[llvm] bbfbec9 - [AMDGPU] Enable OMod on more VOP3 instructions

Joe Nash via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 7 10:30:44 PST 2022


Author: Joe Nash
Date: 2022-12-07T13:30:33-05:00
New Revision: bbfbec94b10699d473c106d85d5a48ff5d69e721

URL: https://github.com/llvm/llvm-project/commit/bbfbec94b10699d473c106d85d5a48ff5d69e721
DIFF: https://github.com/llvm/llvm-project/commit/bbfbec94b10699d473c106d85d5a48ff5d69e721.diff

LOG: [AMDGPU] Enable OMod on more VOP3 instructions

OMod was disabled if OpSel was enabled, but that restriction is more
specific than necessary. Any VOP3 with float operands can use OMod.

On GFX11, FMAC_F16_e64 can use op_sel.
Previously, SIFoldOperands and convertToThreeAddress were accidentally correct when
they reinterpreted the zero OMod operand on V_FMAC_F16_e64 as the OpSel operand on
V_FMA_F16_gfx9_e64. Now we explicitly add op_sel if required.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D139469

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
    llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
    llvm/lib/Target/AMDGPU/SIInstrInfo.td
    llvm/lib/Target/AMDGPU/VOP2Instructions.td
    llvm/lib/Target/AMDGPU/VOP3Instructions.td
    llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir
    llvm/test/CodeGen/AMDGPU/commute-vop3.mir
    llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
    llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir
    llvm/test/CodeGen/AMDGPU/omod.ll
    llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir
    llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
    llvm/test/MC/AMDGPU/gfx9_asm_vop3_e64.s
    llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
    llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
index 979f335d770c5..5f1a4c1b31d07 100644
--- a/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
+++ b/llvm/lib/Target/AMDGPU/SIFoldOperands.cpp
@@ -14,6 +14,7 @@
 #include "SIMachineFunctionInfo.h"
 #include "llvm/ADT/DepthFirstIterator.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineOperand.h"
 
 #define DEBUG_TYPE "si-fold-operands"
 using namespace llvm;
@@ -340,6 +341,9 @@ bool SIFoldOperands::tryAddToFoldList(SmallVectorImpl<FoldCandidate> &FoldList,
       // Check if changing this to a v_mad_{f16, f32} instruction will allow us
       // to fold the operand.
       MI->setDesc(TII->get(NewOpc));
+      if (!AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::op_sel) &&
+          AMDGPU::hasNamedOperand(NewOpc, AMDGPU::OpName::op_sel))
+        MI->addOperand(MachineOperand::CreateImm(0));
       bool FoldAsMAD = tryAddToFoldList(FoldList, MI, OpNo, OpToFold);
       if (FoldAsMAD) {
         MI->untieRegOperand(OpNo);

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 0e4f2b02adb4a..c14b8df1f3902 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3464,6 +3464,7 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
       getNamedOperand(MI, AMDGPU::OpName::src2_modifiers);
   const MachineOperand *Clamp = getNamedOperand(MI, AMDGPU::OpName::clamp);
   const MachineOperand *Omod = getNamedOperand(MI, AMDGPU::OpName::omod);
+  const MachineOperand *OpSel = getNamedOperand(MI, AMDGPU::OpName::op_sel);
 
   if (!Src0Mods && !Src1Mods && !Src2Mods && !Clamp && !Omod && !IsF64 &&
       !IsLegacy &&
@@ -3574,6 +3575,8 @@ MachineInstr *SIInstrInfo::convertToThreeAddress(MachineInstr &MI,
             .add(*Src2)
             .addImm(Clamp ? Clamp->getImm() : 0)
             .addImm(Omod ? Omod->getImm() : 0);
+  if (AMDGPU::hasNamedOperand(NewOpc, AMDGPU::OpName::op_sel))
+    MIB.addImm(OpSel ? OpSel->getImm() : 0);
   updateLiveVariables(LV, MI, *MIB);
   if (LIS)
     LIS->ReplaceMachineInstrInMaps(MI, *MIB);

diff  --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.td b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
index 2671cc9e70ad9..6e13074aa38a1 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.td
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.td
@@ -2160,6 +2160,7 @@ class getAsmVOP3P <int NumSrcArgs, bit HasModifiers,
 
 class getAsmVOP3OpSel <int NumSrcArgs,
                        bit HasClamp,
+                       bit HasOMod,
                        bit Src0HasMods,
                        bit Src1HasMods,
                        bit Src2HasMods> {
@@ -2182,7 +2183,7 @@ class getAsmVOP3OpSel <int NumSrcArgs,
   string src2 = !if(Src2HasMods, fsrc2, isrc2);
 
   string clamp = !if(HasClamp, "$clamp", "");
-  string omod = "";
+  string omod = !if(HasOMod, "$omod", "");
   string ret = dst#", "#src0#src1#src2#"$op_sel"#clamp#omod;
 }
 
@@ -2459,6 +2460,12 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
   field Operand Src1ModSDWA = getSrcModSDWA<Src1VT>.ret;
 
 
+  field bit IsMAI = 0;
+  field bit IsVOP3P = 0;
+  field bit IsDOT = 0;
+  field bit IsSingle = 0;
+  field bit IsWMMA = 0;
+
   field bit HasDst = !ne(DstVT.Value, untyped.Value);
   field bit HasDst32 = HasDst;
   field bit EmitDst = HasDst; // force dst encoding, see v_movreld_b32 special case
@@ -2486,7 +2493,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
 
   field bit IsPacked = isPackedType<Src0VT>.ret;
   field bit HasOpSel = IsPacked;
-  field bit HasOMod = !if(HasOpSel, 0, isFloatType<DstVT>.ret);
+  field bit HasOMod = !if(IsVOP3P, 0, isFloatType<DstVT>.ret);
   field bit HasSDWAOMod = isFloatType<DstVT>.ret;
 
   field bit HasModifiers = !or(isModifierType<Src0VT>.ret,
@@ -2508,12 +2515,6 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
   field bit HasExtSDWA9 = HasExtSDWA;
   field int NeedPatGen = PatGenMode.NoPattern;
 
-  field bit IsMAI = 0;
-  field bit IsVOP3P = 0;
-  field bit IsDOT = 0;
-  field bit IsSingle = 0;
-  field bit IsWMMA = 0;
-
   field Operand Src0PackedMod = !if(HasSrc0FloatMods, PackedF16InputMods, PackedI16InputMods);
   field Operand Src1PackedMod = !if(HasSrc1FloatMods, PackedF16InputMods, PackedI16InputMods);
   field Operand Src2PackedMod = !if(HasSrc2FloatMods, PackedF16InputMods, PackedI16InputMods);
@@ -2574,6 +2575,7 @@ class VOPProfile <list<ValueType> _ArgVT, bit _EnableClamp = 0> {
   field string AsmVOP3P = getAsmVOP3P<NumSrcArgs, HasModifiers, HasClamp, HasOpSel>.ret;
   field string AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs,
                                               HasClamp,
+                                              HasOMod,
                                               HasSrc0FloatMods,
                                               HasSrc1FloatMods,
                                               HasSrc2FloatMods>.ret;

diff  --git a/llvm/lib/Target/AMDGPU/VOP2Instructions.td b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
index 1ba50d9402a2b..8d4676e859555 100644
--- a/llvm/lib/Target/AMDGPU/VOP2Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP2Instructions.td
@@ -424,9 +424,9 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
                     dpp_ctrl:$dpp_ctrl, row_mask:$row_mask,
                     bank_mask:$bank_mask, bound_ctrl:$bound_ctrl);
   let InsDPP16 = !con(InsDPP, (ins FI:$fi));
-  let InsVOP3Base = getIns64<Src0VOP3DPP, Src1VOP3DPP, RegisterOperand<VGPR_32>, 3,
+  let InsVOP3Base = getInsVOP3Base<Src0VOP3DPP, Src1VOP3DPP, RegisterOperand<VGPR_32>, 3,
                        0, HasModifiers, HasModifiers, HasOMod,
-                       Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod>.ret;
+                       Src0ModVOP3DPP, Src1ModVOP3DPP, Src2Mod, HasOpSel, 0/*IsVOP3P*/>.ret;
   // We need a dummy src2 tied to dst to track the use of that register for s_delay_alu
   let InsVOPDX = (ins Src0RC32:$src0X, Src1RC32:$vsrc1X, VGPRSrc_32:$src2X);
   let InsVOPDXDeferred =
@@ -473,6 +473,9 @@ class VOP_MAC <ValueType vt0, ValueType vt1=vt0> : VOPProfile <[vt0, vt1, vt1, v
 def VOP_MAC_F16 : VOP_MAC <f16>;
 def VOP_MAC_F16_t16 : VOP_MAC <f16> {
   let IsTrue16 = 1;
+  let HasOpSel = 1;
+  let AsmVOP3OpSel = getAsmVOP3OpSel<2/*NumSrcArgs*/, HasClamp, HasOMod,
+                        HasSrc0FloatMods, HasSrc1FloatMods, HasSrc2FloatMods>.ret;
   let DstRC = VOPDstOperand<VGPR_32_Lo128>;
   let DstRC64 = VOPDstOperand<VGPR_32>;
   let Src1RC32 = VGPRSrc_32_Lo128;

diff  --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index feeef35357e61..72aeb2e129b4c 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -502,7 +502,7 @@ def VOP3_CVT_SR_F8_F32_Profile : VOP3_Profile<VOPProfile<[i32, f32, i32, f32]>,
   let HasSrc2 = 0;
   let HasSrc2Mods = 1;
   let AsmVOP3OpSel = !subst(", $src2_modifiers", "",
-                            getAsmVOP3OpSel<3, HasClamp,
+                            getAsmVOP3OpSel<3, HasClamp, HasOMod,
                                             HasSrc0FloatMods, HasSrc1FloatMods,
                                             HasSrc2FloatMods>.ret);
   let HasExtVOP3DPP = 0;
@@ -774,7 +774,7 @@ class VOP3_DOT_Profile<VOPProfile P, VOP3Features Features = VOP3_REGULAR> : VOP
   let InsVOP3OpSel = getInsVOP3OpSel<Src0RC64, Src1RC64, Src2RC64, NumSrcArgs,
                                      HasClamp, HasOMod, FP16InputMods,
                                      FP16InputMods, FP16InputMods>.ret;
-  let AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, HasClamp, 1, 1, 1>.ret;
+  let AsmVOP3OpSel = getAsmVOP3OpSel<NumSrcArgs, HasClamp, HasOMod, 1, 1, 1>.ret;
 }
 
 let SubtargetPredicate = isGFX11Plus in {

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir
index 42fd589ec74b6..4a1883f827b30 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-amdgcn.fmed3.s16.mir
@@ -21,7 +21,7 @@ body: |
     ; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
-    ; GCN: %6:vgpr_32 = nofpexcept V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
+    ; GCN: %6:vgpr_32 = nofpexcept V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec
     ; GCN: S_ENDPGM 0, implicit %6
     %0:vgpr(s32) = COPY $vgpr0
     %1:vgpr(s32) = COPY $vgpr1
@@ -48,7 +48,7 @@ body: |
     ; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr0
     ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr0
     ; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr1
-    ; GCN: %6:vgpr_32 = nofpexcept V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
+    ; GCN: %6:vgpr_32 = nofpexcept V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec
     ; GCN: S_ENDPGM 0, implicit %6
     %0:sgpr(s32) = COPY $sgpr0
     %1:vgpr(s32) = COPY $vgpr0

diff  --git a/llvm/test/CodeGen/AMDGPU/commute-vop3.mir b/llvm/test/CodeGen/AMDGPU/commute-vop3.mir
index 271a87cab25e2..bea113e44adc0 100644
--- a/llvm/test/CodeGen/AMDGPU/commute-vop3.mir
+++ b/llvm/test/CodeGen/AMDGPU/commute-vop3.mir
@@ -17,8 +17,8 @@ body: |
     ; GFX9-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX9-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
     ; GFX9-NEXT: [[V_XOR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR3_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
-    ; GFX9-NEXT: [[V_MED3_F16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
-    ; GFX9-NEXT: [[V_MED3_F16_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_F16_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
+    ; GFX9-NEXT: [[V_MED3_F16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec
+    ; GFX9-NEXT: [[V_MED3_F16_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_F16_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX9-NEXT: [[V_MAX3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
     ; GFX9-NEXT: [[V_SAD_HI_U8_e64_:%[0-9]+]]:vgpr_32 = V_SAD_HI_U8_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec
     ; GFX9-NEXT: [[V_XAD_U32_e64_:%[0-9]+]]:vgpr_32 = V_XAD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
@@ -31,8 +31,8 @@ body: |
     ; GFX10-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
     ; GFX10-NEXT: [[COPY2:%[0-9]+]]:vgpr_32 = COPY $vgpr2
     ; GFX10-NEXT: [[V_XOR3_B32_e64_:%[0-9]+]]:vgpr_32 = V_XOR3_B32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
-    ; GFX10-NEXT: [[V_MED3_F16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
-    ; GFX10-NEXT: [[V_MED3_F16_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_F16_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, implicit $mode, implicit $exec
+    ; GFX10-NEXT: [[V_MED3_F16_e64_:%[0-9]+]]:vgpr_32 = V_MED3_F16_e64 0, [[COPY]], 0, [[COPY1]], 0, [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec
+    ; GFX10-NEXT: [[V_MED3_F16_e64_1:%[0-9]+]]:vgpr_32 = V_MED3_F16_e64 0, [[COPY1]], 0, [[COPY]], 0, [[COPY2]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX10-NEXT: [[V_MAX3_I32_e64_:%[0-9]+]]:vgpr_32 = V_MAX3_I32_e64 [[COPY]], [[COPY1]], [[COPY2]], implicit $exec
     ; GFX10-NEXT: [[V_SAD_HI_U8_e64_:%[0-9]+]]:vgpr_32 = V_SAD_HI_U8_e64 [[COPY]], [[COPY1]], [[COPY2]], 0, implicit $exec
     ; GFX10-NEXT: [[V_XAD_U32_e64_:%[0-9]+]]:vgpr_32 = V_XAD_U32_e64 [[COPY]], [[COPY1]], 0, implicit $exec
@@ -44,8 +44,8 @@ body: |
     %3:vgpr_32 = V_XOR3_B32_e64 %0, %1, %2, implicit $exec
     %4:vgpr_32 = V_XOR3_B32_e64 %1, %0, %2, implicit $exec
     ; Insts with MayRaiseFPException do not get CSE
-    %5:vgpr_32 = V_MED3_F16_e64 0, %0, 0, %1, 0, %2, 0, 0, implicit $mode, implicit $exec
-    %6:vgpr_32 = V_MED3_F16_e64 0, %1, 0, %0, 0, %2, 0, 0, implicit $mode, implicit $exec
+    %5:vgpr_32 = V_MED3_F16_e64 0, %0, 0, %1, 0, %2, 0, 0, 0, implicit $mode, implicit $exec
+    %6:vgpr_32 = V_MED3_F16_e64 0, %1, 0, %0, 0, %2, 0, 0, 0, implicit $mode, implicit $exec
 
     %7:vgpr_32 = V_MAX3_I32_e64 %0, %1, %2, implicit $exec
     %8:vgpr_32 = V_MAX3_I32_e64 %1, %0, %2, implicit $exec

diff  --git a/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll b/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
index 4e44a9177de13..b498d876e3762 100644
--- a/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul-2-combine-multi-use.ll
@@ -3,7 +3,7 @@
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=tonga -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,SIVI,VI-FLUSH %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_10,GFX10-DENORM %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1010 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_10,GFX10-FLUSH %s
-; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_10,GFX10-DENORM %s
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_10,GFX11-DENORM %s
 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx1100 -denormal-fp-math=preserve-sign -denormal-fp-math-f32=preserve-sign -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX10,GFX8_10,GFX10-FLUSH %s
 
 ; Make sure (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c)) doesn't
@@ -129,6 +129,7 @@ define amdgpu_kernel void @fmul_x2_xn3_f32(ptr addrspace(1) %out, float %x, floa
 ; VI-FLUSH:     v_mad_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0
 ; VI-DENORM:    v_fma_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0
 ; GFX10-DENORM: v_fma_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0
+; GFX11-DENORM: v_fma_f16 v{{[0-9]+}}, -v{{[0-9]+}}, v{{[0-9]+}}, 1.0
 ; GFX10-FLUSH:  v_sub_f16_e32 v{{[0-9]+}}, 1.0, v{{[0-9]+}}
 define amdgpu_kernel void @multiple_fadd_use_test_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 {
   %x = bitcast i16 %x.arg to half
@@ -155,6 +156,7 @@ define amdgpu_kernel void @multiple_fadd_use_test_f16(ptr addrspace(1) %out, i16
 ; VI-DENORM-DAG:    v_fma_f16 [[MAD:v[0-9]+]], [[X]], 2.0, v{{[0-9]+}}
 ; GFX10-FLUSH-DAG:  v_add_f16_e32 [[MAD:v[0-9]+]], s{{[0-9]+}}, [[MUL2]]
 ; GFX10-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], [[X]], 2.0, s{{[0-9]+}}
+; GFX11-DENORM-DAG: v_fmac_f16_e64 [[MAD:v[0-9]+]], [[X]], 2.0
 
 ; GCN-DAG: buffer_store_{{short|b16}} [[MUL2]]
 ; GCN-DAG: buffer_store_{{short|b16}} [[MAD]]
@@ -177,6 +179,7 @@ define amdgpu_kernel void @multiple_use_fadd_fmac_f16(ptr addrspace(1) %out, i16
 ; VI-DENORM-DAG:    v_fma_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, v{{[0-9]+}}
 ; GFX10-FLUSH-DAG:  v_add_f16_e32 [[MAD:v[0-9]+]], s{{[0-9]+}}, [[MUL2]]
 ; GFX10-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, s{{[0-9]+}}
+; GFX11-DENORM-DAG: v_fma_f16 [[MAD:v[0-9]+]], |[[X]]|, 2.0, s{{[0-9]+}}
 
 ; GCN-DAG: buffer_store_{{short|b16}} [[MUL2]]
 ; GCN-DAG: buffer_store_{{short|b16}} [[MAD]]
@@ -204,7 +207,9 @@ define amdgpu_kernel void @multiple_use_fadd_fmad_f16(ptr addrspace(1) %out, i16
 ; GFX10-FLUSH:  v_add_f16_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[MUL2]]
 ; GFX10-FLUSH:  v_add_f16_e32 {{v[0-9]+}}, {{s[0-9]+}}, [[MUL2]]
 ; GFX10-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, s{{[0-9]+}}
+; GFX11-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X:s[0-9]+]]|, 2.0, s{{[0-9]+}}
 ; GFX10-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X]]|, 2.0, s{{[0-9]+}}
+; GFX11-DENORM: v_fma_f16 {{v[0-9]+}}, |[[X]]|, 2.0, s{{[0-9]+}}
 
 define amdgpu_kernel void @multiple_use_fadd_multi_fmad_f16(ptr addrspace(1) %out, i16 zeroext %x.arg, i16 zeroext %y.arg, i16 zeroext %z.arg) #0 {
   %x = bitcast i16 %x.arg to half

diff  --git a/llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir b/llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir
index df75667e986f9..dba62081a4139 100644
--- a/llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/gfx11-twoaddr-fma.mir
@@ -17,12 +17,12 @@ body:             |
     ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1
     ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
     ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
-    ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     %0 = IMPLICIT_DEF
     %1 = COPY %0.sub1
     %2 = COPY %0.sub0
     %3 = V_MOV_B32_e32 1078523331, implicit $exec
-    %4 = V_FMAC_F16_t16_e64 0, killed %2, 0, %3, 0, killed %1, 0, 0, implicit $mode, implicit $exec
+    %4 = V_FMAC_F16_t16_e64 0, killed %2, 0, %3, 0, killed %1, 0, 0, 0, implicit $mode, implicit $exec
 
 ...
 
@@ -42,12 +42,12 @@ body:             |
     ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1
     ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
     ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
-    ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, [[COPY1]], 0, killed [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, [[COPY1]], 0, killed [[V_MOV_B32_e32_]], 0, killed [[COPY]], 0, 0, 0, implicit $mode, implicit $exec
     %0 = IMPLICIT_DEF
     %1 = COPY %0.sub1
     %2 = COPY %0.sub0
     %3 = V_MOV_B32_e32 1078523331, implicit $exec
-    %4 = V_FMAC_F16_t16_e64 0, %2, 0, killed %3, 0, killed %1, 0, 0, implicit $mode, implicit $exec
+    %4 = V_FMAC_F16_t16_e64 0, %2, 0, killed %3, 0, killed %1, 0, 0, 0, implicit $mode, implicit $exec
 
 ...
 
@@ -67,12 +67,12 @@ body:             |
     ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
     ; GFX11-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1
     ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1078523331, implicit $exec
-    ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY]], 0, [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, killed [[COPY]], 0, [[COPY1]], 0, [[V_MOV_B32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
     %0 = IMPLICIT_DEF
     %1 = COPY %0.sub0
     %2 = COPY %0.sub1
     %3 = V_MOV_B32_e32 1078523331, implicit $exec
-    %4 = V_FMAC_F16_t16_e64 0, killed %1, 0, %2, 0, %3, 0, 0, implicit $mode, implicit $exec
+    %4 = V_FMAC_F16_t16_e64 0, killed %1, 0, %2, 0, %3, 0, 0, 0, implicit $mode, implicit $exec
 ...
 
 ---
@@ -89,12 +89,12 @@ body:             |
     ; GFX11-NEXT: {{  $}}
     ; GFX11-NEXT: [[COPY:%[0-9]+]]:vgpr_32 = COPY killed $vgpr0
     ; GFX11-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec
-    ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, 16384, 0, killed [[COPY]], 0, [[V_MOV_B32_e32_]], 0, 0, implicit $mode, implicit $exec
+    ; GFX11-NEXT: [[V_FMA_F16_gfx9_e64_:%[0-9]+]]:vgpr_32 = V_FMA_F16_gfx9_e64 0, 16384, 0, killed [[COPY]], 0, [[V_MOV_B32_e32_]], 0, 0, 0, implicit $mode, implicit $exec
     ; GFX11-NEXT: S_ENDPGM 0
     %0:vgpr_32 = COPY killed $vgpr0
 
     %1:vgpr_32 = V_MOV_B32_e32 49664, implicit $exec
-    %2:vgpr_32 = V_FMAC_F16_t16_e64 0, 16384, 0, killed %0, 0, %1, 0, 0, implicit $mode, implicit $exec
+    %2:vgpr_32 = V_FMAC_F16_t16_e64 0, 16384, 0, killed %0, 0, %1, 0, 0, 0, implicit $mode, implicit $exec
     S_ENDPGM 0
 
 ...

diff  --git a/llvm/test/CodeGen/AMDGPU/omod.ll b/llvm/test/CodeGen/AMDGPU/omod.ll
index fcc3d6ec73697..daae5a7e98e03 100644
--- a/llvm/test/CodeGen/AMDGPU/omod.ll
+++ b/llvm/test/CodeGen/AMDGPU/omod.ll
@@ -388,6 +388,33 @@ define amdgpu_ps void @v_omod_mul2_f32(float %a) #0 {
   ret void
 }
 
+define amdgpu_ps void @v_omod_mul2_med3(float %x, float %y, float %z) #0 {
+; SI-LABEL: v_omod_mul2_med3:
+; SI:       ; %bb.0:
+; SI-NEXT:    v_med3_f32 v0, v0, v1, v2 mul:2
+; SI-NEXT:    s_mov_b32 s3, 0xf000
+; SI-NEXT:    s_mov_b32 s2, -1
+; SI-NEXT:    buffer_store_dword v0, off, s[0:3], 0
+; SI-NEXT:    s_endpgm
+;
+; VI-LABEL: v_omod_mul2_med3:
+; VI:       ; %bb.0:
+; VI-NEXT:    v_med3_f32 v0, v0, v1, v2 mul:2
+; VI-NEXT:    flat_store_dword v[0:1], v0
+; VI-NEXT:    s_endpgm
+;
+; GFX11-LABEL: v_omod_mul2_med3:
+; GFX11:       ; %bb.0:
+; GFX11-NEXT:    v_med3_f32 v0, v0, v1, v2 mul:2
+; GFX11-NEXT:    global_store_b32 v[0:1], v0, off
+; GFX11-NEXT:    s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
+; GFX11-NEXT:    s_endpgm
+  %fmed3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z)
+  %div2 = fmul float %fmed3, 2.0
+  store float %div2, float addrspace(1)* undef
+  ret void
+}
+
 define amdgpu_ps void @v_omod_mul2_f64(double %a) #5 {
 ; SI-LABEL: v_omod_mul2_f64:
 ; SI:       ; %bb.0:

diff  --git a/llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir b/llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir
index 0a30d60ae3124..825e4d8e2b31b 100644
--- a/llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir
+++ b/llvm/test/CodeGen/AMDGPU/shrink-mad-fma.mir
@@ -258,7 +258,7 @@ body: |
     ; GFX11-NEXT: SI_RETURN implicit $vgpr2
     $vgpr0 = IMPLICIT_DEF
     $vgpr1 = IMPLICIT_DEF
-    $vgpr2 = V_FMA_F16_gfx9_e64 0, 18688, 0, $vgpr0, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec
+    $vgpr2 = V_FMA_F16_gfx9_e64 0, 18688, 0, $vgpr0, 0, $vgpr1, 0, 0, 0, implicit $mode, implicit $exec
     SI_RETURN implicit $vgpr2
 ...
 
@@ -278,7 +278,7 @@ body: |
     ; GFX11-NEXT: SI_RETURN implicit $vgpr2
     $vgpr0 = IMPLICIT_DEF
     $vgpr1 = IMPLICIT_DEF
-    $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, 18688, 0, $vgpr1, 0, 0, implicit $mode, implicit $exec
+    $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, 18688, 0, $vgpr1, 0, 0, 0, implicit $mode, implicit $exec
     SI_RETURN implicit $vgpr2
 ...
 
@@ -298,7 +298,7 @@ body: |
     ; GFX11-NEXT: SI_RETURN implicit $vgpr2
     $vgpr0 = IMPLICIT_DEF
     $vgpr1 = IMPLICIT_DEF
-    $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, implicit $mode, implicit $exec
+    $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, 0, implicit $mode, implicit $exec
     SI_RETURN implicit $vgpr2
 ...
 
@@ -318,6 +318,6 @@ body: |
     ; GFX11-NEXT: SI_RETURN implicit $vgpr2
     $vgpr0 = IMPLICIT_DEF
     $sgpr1 = IMPLICIT_DEF
-    $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, implicit $mode, implicit $exec
+    $vgpr2 = V_FMA_F16_gfx9_e64 0, $vgpr0, 0, $vgpr1, 0, 18688, 0, 0, 0, implicit $mode, implicit $exec
     SI_RETURN implicit $vgpr2
 ...

diff  --git a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
index 991ef34807e85..d236874de5669 100644
--- a/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
+++ b/llvm/test/MC/AMDGPU/gfx11_asm_vop3.s
@@ -1675,6 +1675,9 @@ v_div_fixup_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
 v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp
 // GFX11: encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
 
+v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2
+// GFX11: encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
+
 v_div_fixup_f32 v5, v1, v2, s3
 // GFX11: encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00]
 
@@ -2248,6 +2251,9 @@ v_fma_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
 v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp
 // GFX11: encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
 
+v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2
+// GFX11: encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
+
 v_fma_f32 v5, v1, v2, s3
 // GFX11: encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00]
 
@@ -3208,6 +3214,9 @@ v_max3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
 v_max3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp
 // GFX11: encoding: [0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
 
+v_max3_f16 v5, v255, s2, s105 mul:2
+// GFX11: encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x09]
+
 v_max3_f32 v5, v1, v2, s3
 // GFX11: encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x00]
 
@@ -3874,6 +3883,9 @@ v_med3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
 v_med3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp
 // GFX11: encoding: [0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
 
+v_med3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] div:2
+// GFX11: encoding: [0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x5b]
+
 v_med3_f32 v5, v1, v2, s3
 // GFX11: encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x00]
 
@@ -4144,6 +4156,9 @@ v_min3_f16 v5, -src_scc, |vcc_lo|, -1 op_sel:[0,0,1,0]
 v_min3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp
 // GFX11: encoding: [0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
 
+v_min3_f16 v5, m0, 0.5, m0 clamp mul:4
+// GFX11: encoding: [0x05,0x80,0x49,0xd6,0x7d,0xe0,0xf5,0x11]
+
 v_min3_f32 v5, v1, v2, s3
 // GFX11: encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x00]
 

diff  --git a/llvm/test/MC/AMDGPU/gfx9_asm_vop3_e64.s b/llvm/test/MC/AMDGPU/gfx9_asm_vop3_e64.s
index d8fc1e8fce4d3..fab4042e70cdf 100644
--- a/llvm/test/MC/AMDGPU/gfx9_asm_vop3_e64.s
+++ b/llvm/test/MC/AMDGPU/gfx9_asm_vop3_e64.s
@@ -9282,6 +9282,9 @@ v_min3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1]
 v_min3_f16 v5, v1, v2, v3 clamp
 // CHECK: [0x05,0x80,0xf4,0xd1,0x01,0x05,0x0e,0x04]
 
+v_min3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] mul:2
+// CHECK: [0x05,0x78,0xf4,0xd1,0x01,0x05,0x0e,0x0c]
+
 v_min3_i16 v5, v1, v2, v3
 // CHECK: [0x05,0x00,0xf5,0xd1,0x01,0x05,0x0e,0x04]
 
@@ -9840,6 +9843,9 @@ v_max3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1]
 v_max3_f16 v5, v1, v2, v3 clamp
 // CHECK: [0x05,0x80,0xf7,0xd1,0x01,0x05,0x0e,0x04]
 
+v_max3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] mul:2
+// CHECK: [0x05,0x78,0xf7,0xd1,0x01,0x05,0x0e,0x0c]
+
 v_max3_i16 v5, v1, v2, v3
 // CHECK: [0x05,0x00,0xf8,0xd1,0x01,0x05,0x0e,0x04]
 
@@ -10398,6 +10404,9 @@ v_med3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1]
 v_med3_f16 v5, v1, v2, v3 clamp
 // CHECK: [0x05,0x80,0xfa,0xd1,0x01,0x05,0x0e,0x04]
 
+v_med3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] mul:2
+// CHECK: [0x05,0x78,0xfa,0xd1,0x01,0x05,0x0e,0x0c]
+
 v_med3_i16 v5, v1, v2, v3
 // CHECK: [0x05,0x00,0xfb,0xd1,0x01,0x05,0x0e,0x04]
 
@@ -11982,6 +11991,9 @@ v_mad_f16 v5, v1, v2, v3 op_sel:[1,1,1,1]
 v_mad_f16 v5, v1, v2, v3 clamp
 // CHECK: [0x05,0x80,0x03,0xd2,0x01,0x05,0x0e,0x04]
 
+v_mad_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] mul:4
+// CHECK: [0x05,0x78,0x03,0xd2,0x01,0x05,0x0e,0x14]
+
 v_mad_u16 v5, v1, v2, v3
 // CHECK: [0x05,0x00,0x04,0xd2,0x01,0x05,0x0e,0x04]
 
@@ -12546,6 +12558,9 @@ v_fma_f16 v5, v1, v2, v3 op_sel:[1,1,1,1]
 v_fma_f16 v5, v1, v2, v3 clamp
 // CHECK: [0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04]
 
+v_fma_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] clamp mul:2
+// CHECK: [0x05,0xf8,0x06,0xd2,0x01,0x05,0x0e,0x0c]
+
 v_div_fixup_f16 v5, v1, v2, v3
 // CHECK: [0x05,0x00,0x07,0xd2,0x01,0x05,0x0e,0x04]
 
@@ -12759,6 +12774,9 @@ v_div_fixup_f16 v5, v1, v2, v3 op_sel:[0,0,0,1]
 v_div_fixup_f16 v5, v1, v2, v3 op_sel:[1,1,1,1]
 // CHECK: [0x05,0x78,0x07,0xd2,0x01,0x05,0x0e,0x04]
 
+v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2
+// CHECK: [0x05,0x10,0x07,0xd2,0xf0,0xf8,0xc0,0x4b]
+
 v_div_fixup_f16 v5, v1, v2, v3 clamp
 // CHECK: [0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04]
 

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
index afbda0e321aa1..1db00580d789c 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx11_dasm_vop3.txt
@@ -1479,6 +1479,9 @@
 # GFX11: v_div_fixup_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
 0xff,0xc3,0x54,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00
 
+# CHECK: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2   ; encoding: [0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b]
+0x05,0x10,0x54,0xd6,0xf0,0xfa,0xc0,0x4b
+
 # GFX11: v_div_fixup_f32 v5, v1, v2, s3          ; encoding: [0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00]
 0x05,0x00,0x27,0xd6,0x01,0x05,0x0e,0x00
 
@@ -1920,6 +1923,9 @@
 # GFX11: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
 0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00
 
+# CHECK: v_fma_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp div:2    ; encoding: [0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00]
+0xff,0xc3,0x48,0xd6,0xff,0xd6,0xf0,0x79,0x0b,0xfe,0x00,0x00
+
 # GFX11: v_fma_f32 v5, v1, v2, s3                ; encoding: [0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00]
 0x05,0x00,0x13,0xd6,0x01,0x05,0x0e,0x00
 
@@ -2803,6 +2809,9 @@
 # GFX11: v_max3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
 0xff,0xc3,0x4c,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00
 
+# CHECK: v_max3_f16 v5, v255, s2, s105 mul:2     ; encoding: [0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x09]
+0x05,0x00,0x4c,0xd6,0xff,0x05,0xa4,0x09
+
 # GFX11: v_max3_f32 v5, v1, v2, s3               ; encoding: [0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x00]
 0x05,0x00,0x1c,0xd6,0x01,0x05,0x0e,0x00
 
@@ -3469,6 +3478,9 @@
 # GFX11: v_med3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
 0xff,0xc3,0x4f,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00
 
+# CHECK: v_med3_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] div:2    ; encoding: [0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x5b]
+0x05,0x10,0x4f,0xd6,0xf0,0xfa,0xc0,0x5b
+
 # GFX11: v_med3_f32 v5, v1, v2, s3               ; encoding: [0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x00]
 0x05,0x00,0x1f,0xd6,0x01,0x05,0x0e,0x00
 
@@ -3739,6 +3751,9 @@
 # GFX11: v_min3_f16 v255, -|0xfe0b|, -|vcc_hi|, null op_sel:[0,0,0,1] clamp ; encoding: [0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00]
 0xff,0xc3,0x49,0xd6,0xff,0xd6,0xf0,0x61,0x0b,0xfe,0x00,0x00
 
+# CHECK: v_min3_f16 v5, m0, 0.5, m0 clamp mul:4  ; encoding: [0x05,0x80,0x49,0xd6,0x7d,0xe0,0xf5,0x11]
+0x05,0x80,0x49,0xd6,0x7d,0xe0,0xf5,0x11
+
 # GFX11: v_min3_f32 v5, v1, v2, s3               ; encoding: [0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x00]
 0x05,0x00,0x19,0xd6,0x01,0x05,0x0e,0x00
 

diff  --git a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt
index af8e0df1dee14..e3ed9778f6fb4 100644
--- a/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt
+++ b/llvm/test/MC/Disassembler/AMDGPU/gfx9_vop3.txt
@@ -16230,6 +16230,9 @@
 # CHECK: v_min3_f16 v5, v1, v2, v3 clamp         ; encoding: [0x05,0x80,0xf4,0xd1,0x01,0x05,0x0e,0x04]
 0x05,0x80,0xf4,0xd1,0x01,0x05,0x0e,0x04
 
+# CHECK: v_min3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] mul:2 ; encoding: [0x05,0x78,0xf4,0xd1,0x01,0x05,0x0e,0x0c]
+0x05,0x78,0xf4,0xd1,0x01,0x05,0x0e,0x0c
+
 # CHECK: v_min3_i16 v5, v1, v2, v3               ; encoding: [0x05,0x00,0xf5,0xd1,0x01,0x05,0x0e,0x04]
 0x05,0x00,0xf5,0xd1,0x01,0x05,0x0e,0x04
 
@@ -16698,6 +16701,9 @@
 # CHECK: v_max3_f16 v5, v1, v2, v3 clamp         ; encoding: [0x05,0x80,0xf7,0xd1,0x01,0x05,0x0e,0x04]
 0x05,0x80,0xf7,0xd1,0x01,0x05,0x0e,0x04
 
+# CHECK: v_max3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] mul:2 ; encoding: [0x05,0x78,0xf7,0xd1,0x01,0x05,0x0e,0x0c]
+0x05,0x78,0xf7,0xd1,0x01,0x05,0x0e,0x0c
+
 # CHECK: v_max3_i16 v5, v1, v2, v3               ; encoding: [0x05,0x00,0xf8,0xd1,0x01,0x05,0x0e,0x04]
 0x05,0x00,0xf8,0xd1,0x01,0x05,0x0e,0x04
 
@@ -17166,6 +17172,9 @@
 # CHECK: v_med3_f16 v5, v1, v2, v3 clamp         ; encoding: [0x05,0x80,0xfa,0xd1,0x01,0x05,0x0e,0x04]
 0x05,0x80,0xfa,0xd1,0x01,0x05,0x0e,0x04
 
+# CHECK: v_med3_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] mul:2 ; encoding: [0x05,0x78,0xfa,0xd1,0x01,0x05,0x0e,0x0c]
+0x05,0x78,0xfa,0xd1,0x01,0x05,0x0e,0x0c
+
 # CHECK: v_med3_i16 v5, v1, v2, v3               ; encoding: [0x05,0x00,0xfb,0xd1,0x01,0x05,0x0e,0x04]
 0x05,0x00,0xfb,0xd1,0x01,0x05,0x0e,0x04
 
@@ -19074,6 +19083,9 @@
 # CHECK: v_div_fixup_f16 v5, v1, v2, v3 clamp    ; encoding: [0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04]
 0x05,0x80,0x07,0xd2,0x01,0x05,0x0e,0x04
 
+# CHECK: v_div_fixup_f16 v5, 0.5, -m0, 0.5 op_sel:[0,1,0,0] mul:2 ; encoding: [0x05,0x10,0x07,0xd2,0xf0,0xf8,0xc0,0x4b]
+0x05,0x10,0x07,0xd2,0xf0,0xf8,0xc0,0x4b
+
 # CHECK: v_interp_p1ll_f16 v5, v2, attr0.x       ; encoding: [0x05,0x00,0x74,0xd2,0x00,0x04,0x02,0x00]
 0x05,0x00,0x74,0xd2,0x00,0x04,0x02,0x00
 
@@ -22206,6 +22218,9 @@
 # CHECK: v_fma_f16 v5, v1, v2, v3 clamp          ; encoding: [0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04]
 0x05,0x80,0x06,0xd2,0x01,0x05,0x0e,0x04
 
+# CHECK: v_fma_f16 v5, v1, v2, v3 op_sel:[1,1,1,1] clamp mul:2 ; encoding: [0x05,0xf8,0x06,0xd2,0x01,0x05,0x0e,0x0c]
+0x05,0xf8,0x06,0xd2,0x01,0x05,0x0e,0x0c
+
 # CHECK: v_fma_legacy_f16 v5, v1, v2, v3         ; encoding: [0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04]
 0x05,0x00,0xee,0xd1,0x01,0x05,0x0e,0x04
 


        


More information about the llvm-commits mailing list