[llvm] [AMDGPU][True16][MC] true16 for minimummaximum/max/min/max3/min3 (PR #124184)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 23 15:56:00 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-amdgpu
Author: Brox Chen (broxigarchen)
<details>
<summary>Changes</summary>
true16 support for gfx12 instructions including:
v_minimummaximum_f16
v_maximumminimum_f16
v_maximum_f16
v_minimum_f16
v_maximum3_f16
v_minimum3_f16
---
Patch is 352.37 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/124184.diff
10 Files Affected:
- (modified) llvm/lib/Target/AMDGPU/SIInstrInfo.cpp (+29-5)
- (modified) llvm/lib/Target/AMDGPU/SIInstructions.td (+7-4)
- (modified) llvm/lib/Target/AMDGPU/VOP3Instructions.td (+12-12)
- (modified) llvm/lib/Target/AMDGPU/VOPInstructions.td (+10-3)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3.s (+246-180)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp16.s (+262-184)
- (modified) llvm/test/MC/AMDGPU/gfx12_asm_vop3_dpp8.s (+194-128)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3.txt (+492-90)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp16.txt (+461-88)
- (modified) llvm/test/MC/Disassembler/AMDGPU/gfx12_dasm_vop3_dpp8.txt (+276-60)
``````````diff
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 1d98d68a2ea5d4..26f6308d9317d0 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -5573,8 +5573,12 @@ unsigned SIInstrInfo::getVALUOp(const MachineInstr &MI) const {
case AMDGPU::S_SUB_F16: return AMDGPU::V_SUB_F16_fake16_e64;
case AMDGPU::S_MIN_F16: return AMDGPU::V_MIN_F16_fake16_e64;
case AMDGPU::S_MAX_F16: return AMDGPU::V_MAX_F16_fake16_e64;
- case AMDGPU::S_MINIMUM_F16: return AMDGPU::V_MINIMUM_F16_e64;
- case AMDGPU::S_MAXIMUM_F16: return AMDGPU::V_MAXIMUM_F16_e64;
+ case AMDGPU::S_MINIMUM_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_MINIMUM_F16_t16_e64
+ : AMDGPU::V_MINIMUM_F16_fake16_e64;
+ case AMDGPU::S_MAXIMUM_F16:
+ return ST.useRealTrue16Insts() ? AMDGPU::V_MAXIMUM_F16_t16_e64
+ : AMDGPU::V_MAXIMUM_F16_fake16_e64;
case AMDGPU::S_MUL_F16: return AMDGPU::V_MUL_F16_fake16_e64;
case AMDGPU::S_CVT_PK_RTZ_F16_F32: return AMDGPU::V_CVT_PKRTZ_F16_F32_e64;
case AMDGPU::S_FMAC_F32: return AMDGPU::V_FMAC_F32_e64;
@@ -7547,9 +7551,7 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
return;
}
case AMDGPU::S_MINIMUM_F32:
- case AMDGPU::S_MAXIMUM_F32:
- case AMDGPU::S_MINIMUM_F16:
- case AMDGPU::S_MAXIMUM_F16: {
+ case AMDGPU::S_MAXIMUM_F32: {
const DebugLoc &DL = Inst.getDebugLoc();
Register NewDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
MachineInstr *NewInstr = BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst)
@@ -7566,6 +7568,28 @@ void SIInstrInfo::moveToVALUImpl(SIInstrWorklist &Worklist,
Inst.eraseFromParent();
return;
}
+ case AMDGPU::S_MINIMUM_F16:
+ case AMDGPU::S_MAXIMUM_F16: {
+ const DebugLoc &DL = Inst.getDebugLoc();
+ Register NewDst;
+ if (ST.useRealTrue16Insts())
+ NewDst = MRI.createVirtualRegister(&AMDGPU::VGPR_16RegClass);
+ else
+ NewDst = MRI.createVirtualRegister(&AMDGPU::VGPR_32RegClass);
+ MachineInstr *NewInstr = BuildMI(*MBB, Inst, DL, get(NewOpcode), NewDst)
+ .addImm(0) // src0_modifiers
+ .add(Inst.getOperand(1))
+ .addImm(0) // src1_modifiers
+ .add(Inst.getOperand(2))
+ .addImm(0) // clamp
+ .addImm(0) // omod
+ .addImm(0); // opsel0
+ MRI.replaceRegWith(Inst.getOperand(0).getReg(), NewDst);
+ legalizeOperands(*NewInstr, MDT);
+ addUsersToMoveToVALUWorklist(NewDst, MRI, Worklist);
+ Inst.eraseFromParent();
+ return;
+ }
}
if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END) {
diff --git a/llvm/lib/Target/AMDGPU/SIInstructions.td b/llvm/lib/Target/AMDGPU/SIInstructions.td
index 40a20fa9cb15ea..bf985f0d2b1720 100644
--- a/llvm/lib/Target/AMDGPU/SIInstructions.td
+++ b/llvm/lib/Target/AMDGPU/SIInstructions.td
@@ -3770,12 +3770,15 @@ let True16Predicate = UseFakeTrue16Insts in {
let OtherPredicates = [isGFX12Plus] in {
def : FPMinMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
def : FPMinMaxPat<V_MAXIMUMMINIMUM_F32_e64, f32, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
-def : FPMinMaxPat<V_MINIMUMMAXIMUM_F16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
-def : FPMinMaxPat<V_MAXIMUMMINIMUM_F16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F32_e64, f32, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F32_e64, f32, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
-def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
-def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
+}
+
+let True16Predicate = UseFakeTrue16Insts, OtherPredicates = [isGFX12Plus] in {
+def : FPMinMaxPat<V_MINIMUMMAXIMUM_F16_fake16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
+def : FPMinMaxPat<V_MAXIMUMMINIMUM_F16_fake16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
+def : FPMinCanonMaxPat<V_MINIMUMMAXIMUM_F16_fake16_e64, f16, DivergentBinFrag<fmaximum>, fminimum_oneuse>;
+def : FPMinCanonMaxPat<V_MAXIMUMMINIMUM_F16_fake16_e64, f16, DivergentBinFrag<fminimum>, fmaximum_oneuse>;
}
// Convert a floating-point power of 2 to the integer exponent.
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 947ac5c27620f0..0de12e43fd36ea 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -170,8 +170,8 @@ defm V_MUL_HI_I32 : VOP3Inst <"v_mul_hi_i32", V_MUL_PROF<VOP_I32_I32_I32>, mulhs
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0, AddedComplexity = 1 in {
defm V_MINIMUM_F32 : VOP3Inst <"v_minimum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fminimum>>;
defm V_MAXIMUM_F32 : VOP3Inst <"v_maximum_f32", VOP3_Profile<VOP_F32_F32_F32>, DivergentBinFrag<fmaximum>>;
-defm V_MINIMUM_F16 : VOP3Inst <"v_minimum_f16", VOP3_Profile<VOP_F16_F16_F16>, DivergentBinFrag<fminimum>>;
-defm V_MAXIMUM_F16 : VOP3Inst <"v_maximum_f16", VOP3_Profile<VOP_F16_F16_F16>, DivergentBinFrag<fmaximum>>;
+defm V_MINIMUM_F16 : VOP3Inst_t16 <"v_minimum_f16", VOP_F16_F16_F16, DivergentBinFrag<fminimum>>;
+defm V_MAXIMUM_F16 : VOP3Inst_t16 <"v_maximum_f16", VOP_F16_F16_F16, DivergentBinFrag<fmaximum>>;
let SchedRW = [WriteDoubleAdd] in {
defm V_MINIMUM_F64 : VOP3Inst <"v_minimum_f64", VOP3_Profile<VOP_F64_F64_F64>, fminimum>;
@@ -634,8 +634,8 @@ defm V_MAX3_I16 : VOP3Inst_t16 <"v_max3_i16", VOP_I16_I16_I16_I16, AMDGPUsmax3>;
defm V_MAX3_U16 : VOP3Inst_t16 <"v_max3_u16", VOP_I16_I16_I16_I16, AMDGPUumax3>;
let SubtargetPredicate = HasMinimum3Maximum3F16, ReadsModeReg = 0 in {
- defm V_MINIMUM3_F16 : VOP3Inst <"v_minimum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfminimum3>;
- defm V_MAXIMUM3_F16 : VOP3Inst <"v_maximum3_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>, AMDGPUfmaximum3>;
+ defm V_MINIMUM3_F16 : VOP3Inst_t16 <"v_minimum3_f16", VOP_F16_F16_F16_F16, AMDGPUfminimum3>;
+ defm V_MAXIMUM3_F16 : VOP3Inst_t16 <"v_maximum3_f16", VOP_F16_F16_F16_F16, AMDGPUfmaximum3>;
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
defm V_ADD_I16 : VOP3Inst_t16 <"v_add_i16", VOP_I16_I16_I16>;
@@ -1440,8 +1440,8 @@ let SubtargetPredicate = HasF32ToF16BF16ConversionSRInsts in {
let SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0 in {
defm V_MAXIMUMMINIMUM_F32 : VOP3Inst<"v_maximumminimum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
defm V_MINIMUMMAXIMUM_F32 : VOP3Inst<"v_minimummaximum_f32", VOP3_Profile<VOP_F32_F32_F32_F32>>;
- defm V_MAXIMUMMINIMUM_F16 : VOP3Inst<"v_maximumminimum_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
- defm V_MINIMUMMAXIMUM_F16 : VOP3Inst<"v_minimummaximum_f16", VOP3_Profile<VOP_F16_F16_F16_F16, VOP3_OPSEL>>;
+ defm V_MAXIMUMMINIMUM_F16 : VOP3Inst_t16<"v_maximumminimum_f16", VOP_F16_F16_F16_F16>;
+ defm V_MINIMUMMAXIMUM_F16 : VOP3Inst_t16<"v_minimummaximum_f16", VOP_F16_F16_F16_F16>;
} // End SubtargetPredicate = isGFX12Plus, ReadsModeReg = 0
let SubtargetPredicate = HasDot9Insts, IsDOT=1 in {
@@ -1591,8 +1591,8 @@ defm V_MIN3_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x22b, "v_min3_
defm V_MAX3_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x22c, "v_max3_num_f16", "V_MAX3_F16", "v_max3_f16">;
defm V_MINIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22d>;
defm V_MAXIMUM3_F32 : VOP3Only_Realtriple_gfx12<0x22e>;
-defm V_MINIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x22f>;
-defm V_MAXIMUM3_F16 : VOP3Only_Realtriple_t16_gfx12<0x230>;
+defm V_MINIMUM3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x22f, "v_minimum3_f16">;
+defm V_MAXIMUM3_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x230, "v_maximum3_f16">;
defm V_MED3_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x231, "V_MED3_F32", "v_med3_num_f32">;
defm V_MED3_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x232, "v_med3_num_f16", "V_MED3_F16", "v_med3_f16">;
defm V_MINMAX_NUM_F32 : VOP3_Realtriple_with_name_gfx12<0x268, "V_MINMAX_F32", "v_minmax_num_f32">;
@@ -1601,8 +1601,8 @@ defm V_MINMAX_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x26a, "v_minma
defm V_MAXMIN_NUM_F16 : VOP3_Realtriple_t16_and_fake16_gfx12<0x26b, "v_maxmin_num_f16", "V_MAXMIN_F16", "v_maxmin_f16">;
defm V_MINIMUMMAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26c>;
defm V_MAXIMUMMINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x26d>;
-defm V_MINIMUMMAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x26e>;
-defm V_MAXIMUMMINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x26f>;
+defm V_MINIMUMMAXIMUM_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x26e, "v_minimummaximum_f16">;
+defm V_MAXIMUMMINIMUM_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x26f, "v_maximumminimum_f16">;
defm V_S_EXP_F32 : VOP3Only_Real_Base_gfx12<0x280>;
defm V_S_EXP_F16 : VOP3Only_Real_Base_gfx12<0x281>;
defm V_S_LOG_F32 : VOP3Only_Real_Base_gfx12<0x282>;
@@ -1619,8 +1619,8 @@ defm V_MINIMUM_F64 : VOP3Only_Real_Base_gfx12<0x341>;
defm V_MAXIMUM_F64 : VOP3Only_Real_Base_gfx12<0x342>;
defm V_MINIMUM_F32 : VOP3Only_Realtriple_gfx12<0x365>;
defm V_MAXIMUM_F32 : VOP3Only_Realtriple_gfx12<0x366>;
-defm V_MINIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x367>;
-defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_gfx12<0x368>;
+defm V_MINIMUM_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x367, "v_minimum_f16">;
+defm V_MAXIMUM_F16 : VOP3Only_Realtriple_t16_and_fake16_gfx12<0x368, "v_maximum_f16">;
defm V_PERMLANE16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x30f>;
defm V_PERMLANEX16_VAR_B32 : VOP3Only_Real_Base_gfx12<0x310>;
diff --git a/llvm/lib/Target/AMDGPU/VOPInstructions.td b/llvm/lib/Target/AMDGPU/VOPInstructions.td
index 3b5358b737aa4c..eb18cabe368ce0 100644
--- a/llvm/lib/Target/AMDGPU/VOPInstructions.td
+++ b/llvm/lib/Target/AMDGPU/VOPInstructions.td
@@ -1947,9 +1947,6 @@ multiclass VOP3Only_Realtriple_gfx12<bits<10> op, bit isSingle = 0> :
multiclass VOP3Only_Real_Base_gfx12<bits<10> op> :
VOP3_Real_Base<GFX12Gen, op, NAME, 1/*IsSingle*/>;
-multiclass VOP3Only_Realtriple_t16_gfx12<bits<10> op> :
- VOP3Only_Realtriple<GFX12Gen, op>;
-
multiclass VOP3_Realtriple_t16_gfx12<bits<10> op, string asmName, string opName = NAME,
string pseudo_mnemonic = "", bit isSingle = 0> :
VOP3_Realtriple_with_name<GFX12Gen, op, opName, asmName, pseudo_mnemonic, isSingle>;
@@ -1960,6 +1957,16 @@ multiclass VOP3_Realtriple_t16_and_fake16_gfx12<bits<10> op, string asmName, str
defm _fake16:VOP3_Realtriple_t16_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic, isSingle>;
}
+multiclass VOP3Only_Realtriple_t16_gfx12<bits<10> op, string asmName,
+ string opName = NAME, string pseudo_mnemonic = "">
+ : VOP3_Realtriple_t16_gfx12<op, asmName, opName, pseudo_mnemonic, 1>;
+
+multiclass VOP3Only_Realtriple_t16_and_fake16_gfx12<bits<10> op, string asmName,
+ string opName = NAME, string pseudo_mnemonic = ""> {
+ defm _t16 : VOP3Only_Realtriple_t16_gfx12<op, asmName, opName#"_t16", pseudo_mnemonic>;
+ defm _fake16 : VOP3Only_Realtriple_t16_gfx12<op, asmName, opName#"_fake16", pseudo_mnemonic>;
+}
+
multiclass VOP3be_Real_with_name_gfx12<bits<10> op, string opName,
string asmName, bit isSingle = 0> {
defvar ps = !cast<VOP3_Pseudo>(opName#"_e64");
diff --git a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s
index cd4ed2b9458e6c..ff24cbbbd2bea0 100644
--- a/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx12_asm_vop3.s
@@ -6445,101 +6445,119 @@ v_maximum_f32 v5, -src_scc, |vcc_lo|
v_maximum_f32 v255, -|0xaf123456|, -|vcc_hi|
// GFX12: v_maximum_f32 v255, -|0xaf123456|, -|vcc_hi| ; encoding: [0xff,0x03,0x66,0xd7,0xff,0xd6,0x00,0x60,0x56,0x34,0x12,0xaf]
-v_minimum_f16 v5, v1, v2
-// GFX12: v_minimum_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x67,0xd7,0x01,0x05,0x02,0x00]
+v_minimum_f16 v5.l, v1.l, v2.l
+// GFX12: v_minimum_f16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x67,0xd7,0x01,0x05,0x02,0x00]
-v_minimum_f16 v5, v255, v255
-// GFX12: v_minimum_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x67,0xd7,0xff,0xff,0x03,0x00]
+v_minimum_f16 v5.l, v255.l, v255.l
+// GFX12: v_minimum_f16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x67,0xd7,0xff,0xff,0x03,0x00]
-v_minimum_f16 v5, s1, s2
-// GFX12: v_minimum_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x67,0xd7,0x01,0x04,0x00,0x00]
+v_minimum_f16 v5.l, s1, s2
+// GFX12: v_minimum_f16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x67,0xd7,0x01,0x04,0x00,0x00]
-v_minimum_f16 v5, s105, s105
-// GFX12: v_minimum_f16 v5, s105, s105 ; encoding: [0x05,0x00,0x67,0xd7,0x69,0xd2,0x00,0x00]
+v_minimum_f16 v5.l, s105, s105
+// GFX12: v_minimum_f16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x67,0xd7,0x69,0xd2,0x00,0x00]
-v_minimum_f16 v5, vcc_lo, ttmp15
-// GFX12: v_minimum_f16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x67,0xd7,0x6a,0xf6,0x00,0x00]
+v_minimum_f16 v5.l, vcc_lo, ttmp15
+// GFX12: v_minimum_f16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x67,0xd7,0x6a,0xf6,0x00,0x00]
-v_minimum_f16 v5, vcc_hi, 0xaf12
-// GFX12: v_minimum_f16 v5, vcc_hi, 0xaf12 ; encoding: [0x05,0x00,0x67,0xd7,0x6b,0xfe,0x01,0x00,0x12,0xaf,0x00,0x00]
+v_minimum_f16 v5.l, vcc_hi, 0xaf12
+// GFX12: v_minimum_f16 v5.l, vcc_hi, 0xaf12 ; encoding: [0x05,0x00,0x67,0xd7,0x6b,0xfe,0x01,0x00,0x12,0xaf,0x00,0x00]
-v_minimum_f16 v5, ttmp15, src_scc
-// GFX12: v_minimum_f16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x67,0xd7,0x7b,0xfa,0x01,0x00]
+v_minimum_f16 v5.l, ttmp15, src_scc
+// GFX12: v_minimum_f16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x67,0xd7,0x7b,0xfa,0x01,0x00]
-v_minimum_f16 v5, m0, 0.5
-// GFX12: v_minimum_f16 v5, m0, 0.5 ; encoding: [0x05,0x00,0x67,0xd7,0x7d,0xe0,0x01,0x00]
+v_minimum_f16 v5.l, m0, 0.5
+// GFX12: v_minimum_f16 v5.l, m0, 0.5 ; encoding: [0x05,0x00,0x67,0xd7,0x7d,0xe0,0x01,0x00]
-v_minimum_f16 v5, exec_lo, -1
-// GFX12: v_minimum_f16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x67,0xd7,0x7e,0x82,0x01,0x00]
+v_minimum_f16 v5.l, exec_lo, -1
+// GFX12: v_minimum_f16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x67,0xd7,0x7e,0x82,0x01,0x00]
-v_minimum_f16 v5, |exec_hi|, null
-// GFX12: v_minimum_f16 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x67,0xd7,0x7f,0xf8,0x00,0x00]
+v_minimum_f16 v5.l, |exec_hi|, null
+// GFX12: v_minimum_f16 v5.l, |exec_hi|, null ; encoding: [0x05,0x01,0x67,0xd7,0x7f,0xf8,0x00,0x00]
-v_minimum_f16 v5, null, exec_lo
-// GFX12: v_minimum_f16 v5, null, exec_lo ; encoding: [0x05,0x00,0x67,0xd7,0x7c,0xfc,0x00,0x00]
+v_minimum_f16 v5.l, null, exec_lo
+// GFX12: v_minimum_f16 v5.l, null, exec_lo ; encoding: [0x05,0x00,0x67,0xd7,0x7c,0xfc,0x00,0x00]
-v_minimum_f16 v5, -1, exec_hi
-// GFX12: v_minimum_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x67,0xd7,0xc1,0xfe,0x00,0x00]
+v_minimum_f16 v5.l, -1, exec_hi
+// GFX12: v_minimum_f16 v5.l, -1, exec_hi ; encoding: [0x05,0x00,0x67,0xd7,0xc1,0xfe,0x00,0x00]
-v_minimum_f16 v5, 0.5, -m0
-// GFX12: v_minimum_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x67,0xd7,0xf0,0xfa,0x00,0x40]
+v_minimum_f16 v5.l, 0.5, -m0
+// GFX12: v_minimum_f16 v5.l, 0.5, -m0 ; encoding: [0x05,0x00,0x67,0xd7,0xf0,0xfa,0x00,0x40]
-v_minimum_f16 v5, -src_scc, |vcc_lo|
-// GFX12: v_minimum_f16 v5, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x67,0xd7,0xfd,0xd4,0x00,0x20]
+v_minimum_f16 v5.l, -src_scc, |vcc_lo|
+// GFX12: v_minimum_f16 v5.l, -src_scc, |vcc_lo| ; encoding: [0x05,0x02,0x67,0xd7,0xfd,0xd4,0x00,0x20]
-v_minimum_f16 v255, -|0xaf12|, -|vcc_hi|
-// GFX12: v_minimum_f16 v255, -|0xaf12|, -|vcc_hi| ; encoding: [0xff,0x03,0x67,0xd7,0xff,0xd6,0x00,0x60,0x12,0xaf,0x00,0x00]
+v_minimum_f16 v255.l, -|0xaf12|, -|vcc_hi|
+// GFX12: v_minimum_f16 v255.l, -|0xaf12|, -|vcc_hi| ; encoding: [0xff,0x03,0x67,0xd7,0xff,0xd6,0x00,0x60,0x12,0xaf,0x00,0x00]
-v_minimum_f16 v205, v201, v200
-// GFX12: v_minimum_f16 v205, v201, v200 ; encoding: [0xcd,0x00,0x67,0xd7,0xc9,0x91,0x03,0x00]
+v_minimum_f16 v205.l, v201.l, v200.l
+// GFX12: v_minimum_f16 v205.l, v201.l, v200.l ; encoding: [0xcd,0x00,0x67,0xd7,0xc9,0x91,0x03,0x00]
-v_maximum_f16 v5, v1, v2
-// GFX12: v_maximum_f16 v5, v1, v2 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x05,0x02,0x00]
+v_minimum_f16 v5.l, v1.h, v2.l
+// GFX12: v_minimum_f16 v5.l, v1.h, v2.l op_sel:[1,0,0] ; encoding: [0x05,0x08,0x67,0xd7,0x01,0x05,0x02,0x00]
-v_maximum_f16 v5, v255, v255
-// GFX12: v_maximum_f16 v5, v255, v255 ; encoding: [0x05,0x00,0x68,0xd7,0xff,0xff,0x03,0x00]
+v_minimum_f16 v5.l, v255.l, v255.h
+// GFX12: v_minimum_f16 v5.l, v255.l, v255.h op_sel:[0,1,0] ; encoding: [0x05,0x10,0x67,0xd7,0xff,0xff,0x03,0x00]
-v_maximum_f16 v5, s1, s2
-// GFX12: v_maximum_f16 v5, s1, s2 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x04,0x00,0x00]
+v_minimum_f16 v255.h, 0xfe0b, vcc_hi
+// GFX12: v_minimum_f16 v255.h, 0xfe0b, vcc_hi op_sel:[0,0,1] ; encoding: [0xff,0x40,0x67,0xd7,0xff,0xd6,0x00,0x00,0x0b,0xfe,0x00,0x00]
-v_maximum_f16 v5, s105, s105
-// GFX12: v_maximum_f16 v5, s105, s105 ; encoding: [0x05,0x00,0x68,0xd7,0x69,0xd2,0x00,0x00]
+v_maximum_f16 v5.l, v1.l, v2.l
+// GFX12: v_maximum_f16 v5.l, v1.l, v2.l ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x05,0x02,0x00]
-v_maximum_f16 v5, vcc_lo, ttmp15
-// GFX12: v_maximum_f16 v5, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x68,0xd7,0x6a,0xf6,0x00,0x00]
+v_maximum_f16 v5.l, v255.l, v255.l
+// GFX12: v_maximum_f16 v5.l, v255.l, v255.l ; encoding: [0x05,0x00,0x68,0xd7,0xff,0xff,0x03,0x00]
-v_maximum_f16 v5, vcc_hi, 0xaf12
-// GFX12: v_maximum_f16 v5, vcc_hi, 0xaf12 ; encoding: [0x05,0x00,0x68,0xd7,0x6b,0xfe,0x01,0x00,0x12,0xaf,0x00,0x00]
+v_maximum_f16 v5.l, s1, s2
+// GFX12: v_maximum_f16 v5.l, s1, s2 ; encoding: [0x05,0x00,0x68,0xd7,0x01,0x04,0x00,0x00]
-v_maximum_f16 v5, ttmp15, src_scc
-// GFX12: v_maximum_f16 v5, ttmp15, src_scc ; encoding: [0x05,0x00,0x68,0xd7,0x7b,0xfa,0x01,0x00]
+v_maximum_f16 v5.l, s105, s105
+// GFX12: v_maximum_f16 v5.l, s105, s105 ; encoding: [0x05,0x00,0x68,0xd7,0x69,0xd2,0x00,0x00]
-v_maximum_f16 v5, m0, 0.5
-// GFX12: v_maximum_f16 v5, m0, 0.5 ; encoding: [0x05,0x00,0x68,0xd7,0x7d,0xe0,0x01,0x00]
+v_maximum_f16 v5.l, vcc_lo, ttmp15
+// GFX12: v_maximum_f16 v5.l, vcc_lo, ttmp15 ; encoding: [0x05,0x00,0x68,0xd7,0x6a,0xf6,0x00,0x00]
-v_maximum_f16 v5, exec_lo, -1
-// GFX12: v_maximum_f16 v5, exec_lo, -1 ; encoding: [0x05,0x00,0x68,0xd7,0x7e,0x82,0x01,0x00]
+v_maximum_f16 v5.l, vcc_hi, 0xaf12
+// GFX12: v_maximum_f16 v5.l, vcc_hi, 0xaf12 ; encoding: [0x05,0x00,0x68,0xd7,0x6b,0xfe,0x01,0x00,0x12,0xaf,0x00,0x00]
-v_maximum_f16 v5, |exec_hi|, null
-// GFX12: v_maximum_f16 v5, |exec_hi|, null ; encoding: [0x05,0x01,0x68,0xd7,0x7f,0xf8,0x00,0x00]
+v_maximum_f16 v5.l, ttmp15, src_scc
+// GFX12: v_maximum_f16 v5.l, ttmp15, src_scc ; encoding: [0x05,0x00,0x68,0xd7,0x7b,0xfa,0x01,0x00]
-v_maximum_f16 v5, null, exec_lo
-// GFX12: v_maximum_f16 v5, null, exec_lo ; encoding: [0x05,0x00,0x68,0xd7,0x7c,0xfc,0x00,0x00]
+v_maximum_f16 v5.l, m0, 0.5
+// GFX12: v_maximum_f16 v5.l, m0, 0.5 ; encoding: [0x05,0x00,0x68,0xd7,0x7d,0xe0,0x01,0x00]
-v_maximum_f16 v5, -1, exec_hi
-// GFX12: v_maximum_f16 v5, -1, exec_hi ; encoding: [0x05,0x00,0x68,0xd7,0xc1,0xfe,0x00,0x00]
+v_maximum_f16 v5.l, exec_lo, -1
+// GFX12: v_maximum_f16 v5.l, exec_lo, -1 ; encoding: [0x05,0x00,0x68,0xd7,0x7e,0x82,0x01,0x00]
-v_maximum_f16 v5, 0.5, -m0
-// GFX12: v_maximum_f16 v5, 0.5, -m0 ; encoding: [0x05,0x00,0x68,0xd7,0xf0,0xfa,0x00,0x40]
+v_maximum_f16 v5.l, |exec_hi|, null
+// GFX12: v_maximum_f16 v5.l, |exec_hi|, null ; encoding: [0x05,0x01,0x...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/124184
More information about the llvm-commits
mailing list