[llvm-branch-commits] [llvm] [AMDGPU] Add VOP1 support for gfx13 (PR #177603)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jan 23 07:42:01 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Mariusz Sikora (mariusz-sikora-at-amd)
<details>
<summary>Changes</summary>
---
Patch is 1004.08 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/177603.diff
12 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp (+13-1)
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.td (+1)
- (modified) llvm/lib/Target/AMDGPU/VOP1Instructions.td (+207-153)
- (modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+5)
- (added) llvm/test/MC/AMDGPU/gfx13_asm_vop1.s (+4229)
- (added) llvm/test/MC/AMDGPU/gfx13_asm_vop1_dpp16.s (+3196)
- (added) llvm/test/MC/AMDGPU/gfx13_asm_vop1_dpp8.s (+777)
- (added) llvm/test/MC/AMDGPU/gfx13_asm_vop1_t16_err.s (+653)
- (added) llvm/test/MC/AMDGPU/gfx13_asm_vop1_t16_promote.s (+1485)
- (added) llvm/test/MC/AMDGPU/gfx13_asm_vop3_from_vop1.s (+4186)
- (added) llvm/test/MC/AMDGPU/gfx13_asm_vop3_from_vop1_dpp16.s (+3201)
- (added) llvm/test/MC/AMDGPU/gfx13_asm_vop3_from_vop1_dpp8.s (+880)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
index a22d92632ef5c..b2dfd098735a0 100644
--- a/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
+++ b/llvm/lib/Target/AMDGPU/Disassembler/AMDGPUDisassembler.cpp
@@ -609,6 +609,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
tryDecodeInst(DecoderTableGFX12W6496, MI, DecW, Address, CS))
break;
+ if (isGFX13() &&
+ tryDecodeInst(DecoderTableGFX1396, DecoderTableGFX13_FAKE1696, MI,
+ DecW, Address, CS))
+ break;
+
if (STI.hasFeature(AMDGPU::Feature64BitLiterals)) {
// Return 8 bytes for a potential literal.
Bytes = Bytes_.slice(4, MaxInstBytesNum - 4);
@@ -694,7 +699,9 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
tryDecodeInst(DecoderTableGFX12W6464, MI, QW, Address, CS))
break;
- if (isGFX13() && tryDecodeInst(DecoderTableGFX1364, MI, QW, Address, CS))
+ if (isGFX13() &&
+ tryDecodeInst(DecoderTableGFX1364, DecoderTableGFX13_FAKE1664, MI, QW,
+ Address, CS))
break;
// Reinitialize Bytes
@@ -744,6 +751,11 @@ DecodeStatus AMDGPUDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
tryDecodeInst(DecoderTableGFX1232, DecoderTableGFX12_FAKE1632, MI, DW,
Address, CS))
break;
+
+ if (isGFX13() &&
+ tryDecodeInst(DecoderTableGFX1332, DecoderTableGFX13_FAKE1632, MI, DW,
+ Address, CS))
+ break;
}
return MCDisassembler::Fail;
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 789a65bce5ec5..41074dd75b90a 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -41,6 +41,7 @@ class GFXGen<Predicate pred, string dn, string suffix, int sub> {
int Subtarget = sub;
}
+def GFX13Gen : GFXGen<isGFX13Only, "GFX13", "_gfx13", SIEncodingFamily.GFX13>;
def GFX1250Gen : GFXGen<isGFX125xOnly, "GFX1250", "_gfx1250", SIEncodingFamily.GFX1250>;
def GFX12Not12_50Gen : GFXGen<isGFX12Not12_50, "GFX12", "_gfx12", SIEncodingFamily.GFX12>;
def GFX12Gen : GFXGen<isGFX12Only, "GFX12", "_gfx12", SIEncodingFamily.GFX12>;
diff --git a/llvm/lib/Target/AMDGPU/VOP1Instructions.td b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
index a20578713958d..d8b925ebc0416 100644
--- a/llvm/lib/Target/AMDGPU/VOP1Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP1Instructions.td
@@ -953,7 +953,7 @@ class VOP1_DPP8_Gen<bits<8> op, VOP1_Pseudo ps, GFXGen Gen, VOPProfile p = ps.Pf
}
//===----------------------------------------------------------------------===//
-// GFX11, GFX12
+// GFX11, GFX12, GFX13
//===----------------------------------------------------------------------===//
multiclass VOP1Only_Real<GFXGen Gen, bits<9> op> {
@@ -1027,10 +1027,19 @@ multiclass VOP1_Realtriple_e64_with_name<GFXGen Gen, bits<9> op, string opName,
asmName>;
}
+multiclass VOP1_Realtriple_e64_with_name_gfx12_gfx13<
+ bits<9> op, string opName, string asmName> :
+ VOP1_Realtriple_e64_with_name<GFX12Gen, op, opName, asmName>,
+ VOP1_Realtriple_e64_with_name<GFX13Gen, op, opName, asmName>;
+
multiclass VOP1_Real_FULL<GFXGen Gen, bits<9> op> :
VOP1_Real_e32<Gen, op>, VOP1_Realtriple_e64<Gen, op>,
VOP1_Real_dpp<Gen, op>, VOP1_Real_dpp8<Gen, op>;
+multiclass VOP1_Real_FULL_gfx1250_gfx13<bits<9> op> :
+ VOP1_Real_FULL<GFX1250Gen, op>,
+ VOP1_Real_FULL<GFX13Gen, op>;
+
multiclass VOP1_Real_NO_VOP3_with_name_gfx11<bits<9> op, string opName,
string asmName> {
defm NAME : VOP1_Real_e32_with_name<GFX11Gen, op, opName, asmName>,
@@ -1042,11 +1051,14 @@ multiclass VOP1_Real_NO_VOP3_with_name_gfx11<bits<9> op, string opName,
}
}
-multiclass VOP1_Real_NO_VOP3_with_name_gfx12<bits<9> op, string opName,
- string asmName> {
+multiclass VOP1_Real_NO_VOP3_with_name_gfx12_gfx13<
+ bits<9> op, string opName, string asmName> {
defm NAME : VOP1_Real_e32_with_name<GFX12Gen, op, opName, asmName>,
VOP1_Real_dpp_with_name<GFX12Gen, op, opName, asmName>,
VOP1_Real_dpp8_with_name<GFX12Gen, op, opName, asmName>;
+ defm NAME : VOP1_Real_e32_with_name<GFX13Gen, op, opName, asmName>,
+ VOP1_Real_dpp_with_name<GFX13Gen, op, opName, asmName>,
+ VOP1_Real_dpp8_with_name<GFX13Gen, op, opName, asmName>;
}
multiclass VOP1_Real_FULL_with_name<GFXGen Gen, bits<9> op, string opName,
@@ -1056,6 +1068,11 @@ multiclass VOP1_Real_FULL_with_name<GFXGen Gen, bits<9> op, string opName,
VOP1_Real_dpp8_with_name<Gen, op, opName, asmName>,
VOP1_Realtriple_e64_with_name<Gen, op, opName, asmName>;
+multiclass VOP1_Real_FULL_with_name_gfx1250_gfx13<
+ bits<9> op, string opName, string asmName> :
+ VOP1_Real_FULL_with_name<GFX1250Gen, op, opName, asmName>,
+ VOP1_Real_FULL_with_name<GFX13Gen, op, opName, asmName>;
+
multiclass VOP1_Real_NO_DPP<GFXGen Gen, bits<9> op> :
VOP1_Real_e32<Gen, op>, VOP1_Real_e64<Gen, op>;
@@ -1064,134 +1081,158 @@ multiclass VOP1_Real_with_DPP16<GFXGen Gen, bits<9> op> :
VOP1_Real_dpp<Gen, op>,
VOP3_Real_dpp_Base<Gen, {0, 1, 1, op{6-0}}>;
-multiclass VOP1_Real_FULL_t16_gfx11_gfx12<bits<9> op, string asmName,
- string opName = NAME> :
+multiclass VOP1_Real_FULL_t16_gfx11_gfx12_gfx13<
+ bits<9> op, string asmName, string opName = NAME> :
VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
- VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
+ VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>,
+ VOP1_Real_FULL_with_name<GFX13Gen, op, opName, asmName>;
+
+multiclass VOP1_Real_FULL_with_name_gfx12_gfx13<
+ bits<9> op, string opName, string asmName> :
+ VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>,
+ VOP1_Real_FULL_with_name<GFX13Gen, op, opName, asmName>;
-multiclass VOP1_Real_FULL_with_name_gfx11_gfx12<bits<9> op, string opName,
- string asmName> :
+multiclass VOP1_Real_FULL_with_name_gfx11_gfx12_gfx13<
+ bits<9> op, string opName, string asmName> :
VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
- VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>;
+ VOP1_Real_FULL_with_name<GFX12Gen, op, opName, asmName>,
+ VOP1_Real_FULL_with_name<GFX13Gen, op, opName, asmName>;
-multiclass VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<
+multiclass VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<
bits<9> op, string asmName = !tolower(NAME), string opName = NAME> {
defm opName#"_t16" :
- VOP1_Real_FULL_with_name_gfx11_gfx12<op, opName#"_t16", asmName>;
+ VOP1_Real_FULL_with_name_gfx11_gfx12_gfx13<op, opName#"_t16", asmName>;
defm opName#"_fake16":
- VOP1_Real_FULL_with_name_gfx11_gfx12<op, opName#"_fake16", asmName>;
+ VOP1_Real_FULL_with_name_gfx11_gfx12_gfx13<op, opName#"_fake16", asmName>;
}
-multiclass VOP1Only_Real_gfx11_gfx12<bits<9> op> :
- VOP1Only_Real<GFX11Gen, op>, VOP1Only_Real<GFX12Gen, op>;
+multiclass VOP1Only_Real_gfx11_gfx12_gfx13<bits<9> op> :
+ VOP1Only_Real<GFX11Gen, op>, VOP1Only_Real<GFX12Gen, op>,
+ VOP1Only_Real<GFX13Gen, op>;
multiclass VOP1_Real_FULL_gfx11_gfx12<bits<9> op> :
VOP1_Real_FULL<GFX11Gen, op>, VOP1_Real_FULL<GFX12Gen, op>;
-multiclass VOP1_Real_FULL_t16_and_fake16_gfx1250<
+multiclass VOP1_Real_e32_with_name_gfx12_gfx13<bits<9> op, string opName,
+ string asmName> :
+ VOP1_Real_e32_with_name<GFX12Gen, op, opName, asmName>,
+ VOP1_Real_e32_with_name<GFX13Gen, op, opName, asmName>;
+
+multiclass VOP1_Real_FULL_t16<GFXGen Gen, bits<9> op> :
+ VOP1_Real_FULL_with_name<Gen, op, NAME,
+ !cast<VOP1_Pseudo>(!subst("_fake16", "", NAME)#"_e32").Mnemonic>;
+
+multiclass VOP1_Real_FULL_t16_and_fake16_gfx1250_gfx13<
bits<9> op, string asmName = !tolower(NAME), string opName = NAME> {
defm opName#"_t16" :
- VOP1_Real_FULL_with_name<GFX1250Gen, op, opName#"_t16", asmName>;
+ VOP1_Real_FULL_with_name<GFX1250Gen, op, opName#"_t16", asmName>,
+ VOP1_Real_FULL_with_name<GFX13Gen, op, opName#"_t16", asmName>;
defm opName#"_fake16":
- VOP1_Real_FULL_with_name<GFX1250Gen, op, opName#"_fake16", asmName>;
+ VOP1_Real_FULL_with_name<GFX1250Gen, op, opName#"_fake16", asmName>,
+ VOP1_Real_FULL_with_name<GFX13Gen, op, opName#"_fake16", asmName>;
}
-multiclass VOP1_Real_FULL_with_name_gfx11_gfx12_not_gfx1250<bits<9> op, string opName,
- string asmName> :
+multiclass VOP1_Real_FULL_with_name_gfx11_gfx12_gfx13_not_gfx1250<bits<9> op, string opName,
+ string asmName> :
VOP1_Real_FULL_with_name<GFX11Gen, op, opName, asmName>,
- VOP1_Real_FULL_with_name<GFX12Not12_50Gen, op, opName, asmName>;
+ VOP1_Real_FULL_with_name<GFX12Not12_50Gen, op, opName, asmName>,
+ VOP1_Real_FULL_with_name<GFX13Gen, op, opName, asmName>;
-multiclass VOP1_Real_OpSelIsDPP_gfx1250<bits<9> op> : VOP1_Real_e32<GFX1250Gen, op> {
+multiclass VOP1_Real_OpSelIsDPP<GFXGen Gen, bits<9> op> : VOP1_Real_e32<Gen, op> {
defvar ps = !cast<VOP_Pseudo>(NAME#"_e64");
- def _e64_gfx1250 :
- VOP3_Real_Gen<ps, GFX1250Gen>,
+ def _e64#Gen.Suffix :
+ VOP3_Real_Gen<ps, Gen>,
VOP3OpSelIsDPP_gfx12<{0, 1, 1, op{6-0}}, ps.Pfl>;
}
-defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name_gfx11_gfx12_not_gfx1250<0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">;
+multiclass VOP1_Real_OpSelIsDPP_gfx1250_gfx13<bits<9> op> :
+ VOP1_Real_OpSelIsDPP<GFX1250Gen, op>,
+ VOP1_Real_OpSelIsDPP<GFX13Gen, op>;
+
+defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name_gfx11_gfx12_gfx13_not_gfx1250<0x06c, "V_CVT_F32_FP8_OP_SEL", "v_cvt_f32_fp8">;
defm V_CVT_F32_FP8 : VOP1_Real_FULL_with_name<GFX1250Gen, 0x06c, "V_CVT_F32_FP8_gfx1250", "v_cvt_f32_fp8">;
-defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name<GFX12Gen, 0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">;
+defm V_CVT_F32_BF8 : VOP1_Real_FULL_with_name_gfx12_gfx13<0x06d, "V_CVT_F32_BF8_OP_SEL", "v_cvt_f32_bf8">;
-defm V_CVT_PK_F32_FP8_fake16 : VOP1_Real_e32_with_name<GFX12Gen, 0x06e, "V_CVT_PK_F32_FP8_fake16", "v_cvt_pk_f32_fp8">;
-defm V_CVT_PK_F32_FP8_t16 : VOP1_Real_e32_with_name<GFX12Gen, 0x06e, "V_CVT_PK_F32_FP8_t16", "v_cvt_pk_f32_fp8">;
-defm V_CVT_PK_F32_FP8_fake16 : VOP3_Real_with_name<GFX12Gen, 0x1ee, "V_CVT_PK_F32_FP8_fake16", "v_cvt_pk_f32_fp8">;
-defm V_CVT_PK_F32_FP8_t16 : VOP3_Real_with_name<GFX12Gen, 0x1ee, "V_CVT_PK_F32_FP8_t16", "v_cvt_pk_f32_fp8">;
-defm V_CVT_PK_F32_BF8_fake16 : VOP1_Real_e32_with_name<GFX12Gen, 0x06f, "V_CVT_PK_F32_BF8_fake16", "v_cvt_pk_f32_bf8">;
-defm V_CVT_PK_F32_BF8_t16 : VOP1_Real_e32_with_name<GFX12Gen, 0x06f, "V_CVT_PK_F32_BF8_t16", "v_cvt_pk_f32_bf8">;
-defm V_CVT_PK_F32_BF8_fake16 : VOP3_Real_with_name<GFX12Gen, 0x1ef, "V_CVT_PK_F32_BF8_fake16", "v_cvt_pk_f32_bf8">;
-defm V_CVT_PK_F32_BF8_t16 : VOP3_Real_with_name<GFX12Gen, 0x1ef, "V_CVT_PK_F32_BF8_t16", "v_cvt_pk_f32_bf8">;
+defm V_CVT_PK_F32_FP8_fake16 : VOP1_Real_e32_with_name_gfx12_gfx13<0x06e, "V_CVT_PK_F32_FP8_fake16", "v_cvt_pk_f32_fp8">;
+defm V_CVT_PK_F32_FP8_t16 : VOP1_Real_e32_with_name_gfx12_gfx13<0x06e, "V_CVT_PK_F32_FP8_t16", "v_cvt_pk_f32_fp8">;
+defm V_CVT_PK_F32_FP8_fake16 : VOP3_Real_with_name_gfx12_gfx13<0x1ee, "V_CVT_PK_F32_FP8_fake16", "v_cvt_pk_f32_fp8">;
+defm V_CVT_PK_F32_FP8_t16 : VOP3_Real_with_name_gfx12_gfx13<0x1ee, "V_CVT_PK_F32_FP8_t16", "v_cvt_pk_f32_fp8">;
+defm V_CVT_PK_F32_BF8_fake16 : VOP1_Real_e32_with_name_gfx12_gfx13<0x06f, "V_CVT_PK_F32_BF8_fake16", "v_cvt_pk_f32_bf8">;
+defm V_CVT_PK_F32_BF8_t16 : VOP1_Real_e32_with_name_gfx12_gfx13<0x06f, "V_CVT_PK_F32_BF8_t16", "v_cvt_pk_f32_bf8">;
+defm V_CVT_PK_F32_BF8_fake16 : VOP3_Real_with_name_gfx12_gfx13<0x1ef, "V_CVT_PK_F32_BF8_fake16", "v_cvt_pk_f32_bf8">;
+defm V_CVT_PK_F32_BF8_t16 : VOP3_Real_with_name_gfx12_gfx13<0x1ef, "V_CVT_PK_F32_BF8_t16", "v_cvt_pk_f32_bf8">;
-defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00c,
+defm V_CVT_NEAREST_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12_gfx13<0x00c,
"V_CVT_RPI_I32_F32", "v_cvt_nearest_i32_f32">;
-defm V_CVT_FLOOR_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x00d,
+defm V_CVT_FLOOR_I32_F32 : VOP1_Real_FULL_with_name_gfx11_gfx12_gfx13<0x00d,
"V_CVT_FLR_I32_F32", "v_cvt_floor_i32_f32">;
-defm V_CLZ_I32_U32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x039,
+defm V_CLZ_I32_U32 : VOP1_Real_FULL_with_name_gfx11_gfx12_gfx13<0x039,
"V_FFBH_U32", "v_clz_i32_u32">;
-defm V_CTZ_I32_B32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03a,
+defm V_CTZ_I32_B32 : VOP1_Real_FULL_with_name_gfx11_gfx12_gfx13<0x03a,
"V_FFBL_B32", "v_ctz_i32_b32">;
-defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11_gfx12<0x03b,
+defm V_CLS_I32 : VOP1_Real_FULL_with_name_gfx11_gfx12_gfx13<0x03b,
"V_FFBH_I32", "v_cls_i32">;
-defm V_SWAP_B16 : VOP1Only_Real_gfx11_gfx12<0x066>;
-defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12<0x067>;
-defm V_MOV_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x01c, "v_mov_b16">;
-defm V_NOT_B16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x069>;
-defm V_CVT_I32_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x06a>;
-defm V_CVT_U32_U16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x06b>;
-
-defm V_CVT_F16_U16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x050>;
-defm V_CVT_F16_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x051>;
-defm V_CVT_U16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x052>;
-defm V_CVT_I16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x053>;
-defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
-defm V_RCP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x054, "v_rcp_f16">;
-defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">;
-defm V_SQRT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x055, "v_sqrt_f16">;
-defm V_RSQ_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">;
-defm V_RSQ_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x056, "v_rsq_f16">;
-defm V_LOG_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">;
-defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x057, "v_log_f16">;
-defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
-defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x058, "v_exp_f16">;
-defm V_FREXP_MANT_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x059>;
-defm V_FREXP_EXP_I16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x05a>;
-defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
-defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05b, "v_floor_f16">;
-defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
-defm V_CEIL_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12<0x05c, "v_ceil_f16">;
-defm V_TRUNC_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x05d>;
-defm V_RNDNE_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x05e>;
-defm V_FRACT_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x05f>;
-defm V_SIN_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x060>;
-defm V_COS_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x061>;
-defm V_SAT_PK_U8_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x062>;
-defm V_CVT_NORM_I16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x063>;
-defm V_CVT_NORM_U16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x064>;
-
-defm V_CVT_F16_F32 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00a>;
-defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12<0x00b>;
+defm V_SWAP_B16 : VOP1Only_Real_gfx11_gfx12_gfx13<0x066>;
+defm V_PERMLANE64_B32 : VOP1Only_Real_gfx11_gfx12_gfx13<0x067>;
+defm V_MOV_B16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12_gfx13<0x01c, "v_mov_b16">;
+defm V_NOT_B16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<0x069>;
+defm V_CVT_I32_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<0x06a>;
+defm V_CVT_U32_U16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<0x06b>;
+
+defm V_CVT_F16_U16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<0x050>;
+defm V_CVT_F16_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<0x051>;
+defm V_CVT_U16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<0x052>;
+defm V_CVT_I16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<0x053>;
+defm V_RCP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12_gfx13<0x054, "v_rcp_f16">;
+defm V_RCP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12_gfx13<0x054, "v_rcp_f16">;
+defm V_SQRT_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12_gfx13<0x055, "v_sqrt_f16">;
+defm V_SQRT_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12_gfx13<0x055, "v_sqrt_f16">;
+defm V_RSQ_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12_gfx13<0x056, "v_rsq_f16">;
+defm V_RSQ_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12_gfx13<0x056, "v_rsq_f16">;
+defm V_LOG_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12_gfx13<0x057, "v_log_f16">;
+defm V_LOG_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12_gfx13<0x057, "v_log_f16">;
+defm V_EXP_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12_gfx13<0x058, "v_exp_f16">;
+defm V_EXP_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12_gfx13<0x058, "v_exp_f16">;
+defm V_FREXP_MANT_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<0x059>;
+defm V_FREXP_EXP_I16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<0x05a>;
+defm V_FLOOR_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12_gfx13<0x05b, "v_floor_f16">;
+defm V_FLOOR_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12_gfx13<0x05b, "v_floor_f16">;
+defm V_CEIL_F16_t16 : VOP1_Real_FULL_t16_gfx11_gfx12_gfx13<0x05c, "v_ceil_f16">;
+defm V_CEIL_F16_fake16 : VOP1_Real_FULL_t16_gfx11_gfx12_gfx13<0x05c, "v_ceil_f16">;
+defm V_TRUNC_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<0x05d>;
+defm V_RNDNE_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<0x05e>;
+defm V_FRACT_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<0x05f>;
+defm V_SIN_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<0x060>;
+defm V_COS_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<0x061>;
+defm V_SAT_PK_U8_I16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<0x062>;
+defm V_CVT_NORM_I16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<0x063>;
+defm V_CVT_NORM_U16_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<0x064>;
+
+defm V_CVT_F16_F32 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<0x00a>;
+defm V_CVT_F32_F16 : VOP1_Real_FULL_t16_and_fake16_gfx11_gfx12_gfx13<0x00b>;
defm V_MOV_B64 : VOP1_Real_FULL <GFX1250Gen, 0x1d>;
-defm V_TANH_F32 : VOP1_Real_FULL<GFX1250Gen, 0x01e>;
-defm V_TANH_F16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x01f>;
-defm V_PERMLANE16_SWAP_B32 : VOP1_Real_OpSelIsDPP_gfx1250<0x049>;
-defm V_TANH_BF16 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x04a>;
-defm V_PRNG_B32 : VOP1_Real_FULL<GFX1250Gen, 0x04b>;
-defm V_CVT_F32_BF16_gfx1250 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x072, "v_cvt_f32_bf16">;
-defm V_SAT_PK4_I4_I8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x073>;
-defm V_SAT_PK4_U4_U8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x074>;
-defm V_CVT_PK_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x075>;
-defm V_CVT_PK_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x076>;
-defm V_CVT_F16_FP8 : VOP1_Real_FULL_t16_and_fake16_gfx1250<0x077>;
-defm V_CVT_F16_BF8 : VOP1_Real_FULL_t16_and_fake16_gfx1250...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/177603
More information about the llvm-branch-commits
mailing list