[llvm] [AMDGPU][CodeGen] Fold immediates in src1 operands of V_MAD/MAC/FMA/FMAC. (PR #68002)

Ivan Kosarev via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 5 04:08:24 PDT 2023


https://github.com/kosarev updated https://github.com/llvm/llvm-project/pull/68002

>From 12165f507c8359be7e7ebb501e5eb8ade4005c95 Mon Sep 17 00:00:00 2001
From: Ivan Kosarev <ivan.kosarev at amd.com>
Date: Mon, 2 Oct 2023 17:14:23 +0100
Subject: [PATCH] [AMDGPU][CodeGen] Fold immediates in src1 operands of
 V_MAD/MAC/FMA/FMAC.

---
 llvm/lib/Target/AMDGPU/SIInstrInfo.cpp        |  25 +-
 .../AMDGPU/amdgpu-codegenprepare-idiv.ll      |  52 ++--
 llvm/test/CodeGen/AMDGPU/bypass-div.ll        | 226 +++++++++---------
 .../test/CodeGen/AMDGPU/carryout-selection.ll |  28 +--
 .../CodeGen/AMDGPU/dagcombine-fma-fmad.ll     |  79 +++---
 .../AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll        |   2 +-
 llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll  |  24 +-
 llvm/test/CodeGen/AMDGPU/llvm.log.ll          |  73 +++---
 llvm/test/CodeGen/AMDGPU/llvm.log10.ll        |  73 +++---
 llvm/test/CodeGen/AMDGPU/madmk.ll             |   9 +-
 llvm/test/CodeGen/AMDGPU/operand-folding.ll   |   4 +-
 llvm/test/CodeGen/AMDGPU/sdiv64.ll            |  20 +-
 llvm/test/CodeGen/AMDGPU/srem64.ll            |  24 +-
 llvm/test/CodeGen/AMDGPU/udiv.ll              |   6 +-
 llvm/test/CodeGen/AMDGPU/udiv64.ll            |  22 +-
 llvm/test/CodeGen/AMDGPU/urem64.ll            |  18 +-
 llvm/test/CodeGen/AMDGPU/v_mac.ll             |   2 +-
 llvm/test/CodeGen/AMDGPU/wave32.ll            |   8 +-
 18 files changed, 339 insertions(+), 356 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 1b66f5d06ea2618..792f4695d288b5f 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3250,9 +3250,12 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
     MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
 
     // Multiplied part is the constant: Use v_madmk_{f16, f32}.
-    // We should only expect these to be on src0 due to canonicalization.
-    if (Src0->isReg() && Src0->getReg() == Reg) {
-      if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
+    if ((Src0->isReg() && Src0->getReg() == Reg) ||
+        (Src1->isReg() && Src1->getReg() == Reg)) {
+      MachineOperand *RegSrc =
+          Src1->isReg() && Src1->getReg() == Reg ? Src0 : Src1;
+      if (!RegSrc->isReg() ||
+          RI.isSGPRClass(MRI->getRegClass(RegSrc->getReg())))
         return false;
 
       if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))
@@ -3266,18 +3269,22 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
       if (pseudoToMCOpcode(NewOpc) == -1)
         return false;
 
-      // We need to swap operands 0 and 1 since madmk constant is at operand 1.
+      // V_FMAMK_F16_t16 takes VGPR_32_Lo128 operands, so the rewrite
+      // would also require restricting their register classes. For now
+      // just bail out.
+      if (NewOpc == AMDGPU::V_FMAMK_F16_t16)
+        return false;
 
       const int64_t Imm = ImmOp->getImm();
 
       // FIXME: This would be a lot easier if we could return a new instruction
       // instead of having to modify in place.
 
-      Register Src1Reg = Src1->getReg();
-      unsigned Src1SubReg = Src1->getSubReg();
-      Src0->setReg(Src1Reg);
-      Src0->setSubReg(Src1SubReg);
-      Src0->setIsKill(Src1->isKill());
+      Register SrcReg = RegSrc->getReg();
+      unsigned SrcSubReg = RegSrc->getSubReg();
+      Src0->setReg(SrcReg);
+      Src0->setSubReg(SrcSubReg);
+      Src0->setIsKill(RegSrc->isKill());
 
       if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
           Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
index 1b8216f4aa2a61f..c793f9ee682f8c4 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
@@ -7149,7 +7149,7 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX6-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX6-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX6-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX6-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX6-NEXT:    s_movk_i32 s8, 0x11f
@@ -7269,7 +7269,7 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX9-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX9-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX9-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
@@ -7533,21 +7533,21 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out, <
 ; GFX6-NEXT:    v_madak_f32 v0, 0, v0, 0x457ff000
 ; GFX6-NEXT:    v_rcp_f32_e32 v0, v0
 ; GFX6-NEXT:    s_movk_i32 s6, 0xf001
-; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
 ; GFX6-NEXT:    s_movk_i32 s8, 0xfff
+; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX6-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX6-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX6-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
-; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX6-NEXT:    s_lshr_b64 s[0:1], s[0:1], 12
-; GFX6-NEXT:    s_mov_b32 s7, 0xf000
+; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
 ; GFX6-NEXT:    v_mul_hi_u32 v2, v0, s6
 ; GFX6-NEXT:    v_mul_lo_u32 v4, v1, s6
 ; GFX6-NEXT:    v_mul_lo_u32 v3, v0, s6
+; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
+; GFX6-NEXT:    s_lshr_b64 s[0:1], s[0:1], 12
 ; GFX6-NEXT:    v_sub_i32_e32 v2, vcc, v2, v0
 ; GFX6-NEXT:    v_add_i32_e32 v2, vcc, v2, v4
 ; GFX6-NEXT:    v_mul_hi_u32 v5, v0, v3
@@ -7647,7 +7647,7 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out, <
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX9-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX9-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX9-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7834,7 +7834,7 @@ define amdgpu_kernel void @urem_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX6-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX6-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX6-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX6-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
@@ -7954,7 +7954,7 @@ define amdgpu_kernel void @urem_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX9-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX9-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX9-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX9-NEXT:    v_readfirstlane_b32 s0, v0
@@ -8283,7 +8283,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX6-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX6-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX6-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX6-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
@@ -8399,7 +8399,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX9-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX9-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX9-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX9-NEXT:    v_readfirstlane_b32 s4, v1
@@ -8589,14 +8589,14 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
 ; GFX6-NEXT:    s_sub_u32 s4, 0, s10
 ; GFX6-NEXT:    s_subb_u32 s5, 0, s11
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX6-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; GFX6-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
 ; GFX6-NEXT:    v_rcp_f32_e32 v0, v0
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_ashr_i32 s12, s3, 31
 ; GFX6-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX6-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX6-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX6-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX6-NEXT:    s_add_u32 s2, s2, s12
@@ -8724,13 +8724,13 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NEXT:    s_sub_u32 s0, 0, s8
 ; GFX9-NEXT:    s_subb_u32 s1, 0, s9
-; GFX9-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; GFX9-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
 ; GFX9-NEXT:    v_rcp_f32_e32 v1, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v1
 ; GFX9-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v1
 ; GFX9-NEXT:    v_trunc_f32_e32 v2, v2
-; GFX9-NEXT:    v_mac_f32_e32 v1, 0xcf800000, v2
+; GFX9-NEXT:    v_madmk_f32 v1, v2, 0xcf800000, v1
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX9-NEXT:    v_readfirstlane_b32 s10, v2
@@ -8944,14 +8944,14 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out,
 ; GFX6-NEXT:    v_mov_b32_e32 v1, 0x4f800000
 ; GFX6-NEXT:    v_mac_f32_e32 v0, 0, v1
 ; GFX6-NEXT:    v_rcp_f32_e32 v0, v0
-; GFX6-NEXT:    s_movk_i32 s6, 0xf001
 ; GFX6-NEXT:    s_load_dwordx2 s[4:5], s[0:1], 0x9
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX6-NEXT:    s_movk_i32 s6, 0xf001
 ; GFX6-NEXT:    s_mov_b32 s7, 0xf000
 ; GFX6-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX6-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX6-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX6-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
@@ -9073,7 +9073,7 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out,
 ; GFX9-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v1
 ; GFX9-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v1
 ; GFX9-NEXT:    v_trunc_f32_e32 v2, v2
-; GFX9-NEXT:    v_mac_f32_e32 v1, 0xcf800000, v2
+; GFX9-NEXT:    v_madmk_f32 v1, v2, 0xcf800000, v1
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
@@ -9789,7 +9789,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX6-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX6-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX6-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX6-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX6-NEXT:    s_mov_b32 s2, -1
@@ -9903,7 +9903,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX9-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX9-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX9-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX9-NEXT:    v_readfirstlane_b32 s4, v1
@@ -10093,14 +10093,14 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
 ; GFX6-NEXT:    s_sub_u32 s4, 0, s8
 ; GFX6-NEXT:    s_subb_u32 s5, 0, s9
 ; GFX6-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX6-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; GFX6-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
 ; GFX6-NEXT:    v_rcp_f32_e32 v0, v0
 ; GFX6-NEXT:    s_waitcnt lgkmcnt(0)
 ; GFX6-NEXT:    s_ashr_i32 s10, s3, 31
 ; GFX6-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX6-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX6-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX6-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX6-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX6-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX6-NEXT:    s_add_u32 s2, s2, s10
@@ -10226,13 +10226,13 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
 ; GFX9-NEXT:    s_load_dwordx4 s[4:7], s[0:1], 0x24
 ; GFX9-NEXT:    s_sub_u32 s0, 0, s8
 ; GFX9-NEXT:    s_subb_u32 s1, 0, s9
-; GFX9-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; GFX9-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
 ; GFX9-NEXT:    v_rcp_f32_e32 v1, v0
 ; GFX9-NEXT:    v_mov_b32_e32 v0, 0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v1
 ; GFX9-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v1
 ; GFX9-NEXT:    v_trunc_f32_e32 v2, v2
-; GFX9-NEXT:    v_mac_f32_e32 v1, 0xcf800000, v2
+; GFX9-NEXT:    v_madmk_f32 v1, v2, 0xcf800000, v1
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v2, v2
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX9-NEXT:    v_readfirstlane_b32 s2, v2
diff --git a/llvm/test/CodeGen/AMDGPU/bypass-div.ll b/llvm/test/CodeGen/AMDGPU/bypass-div.ll
index 2184478635e0e38..cb1b664549c9a30 100644
--- a/llvm/test/CodeGen/AMDGPU/bypass-div.ll
+++ b/llvm/test/CodeGen/AMDGPU/bypass-div.ll
@@ -25,12 +25,12 @@ define i64 @sdiv64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v3, v10
 ; GFX9-NEXT:    v_sub_co_u32_e32 v7, vcc, 0, v11
 ; GFX9-NEXT:    v_subb_co_u32_e32 v8, vcc, 0, v10, vcc
-; GFX9-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; GFX9-NEXT:    v_madmk_f32 v2, v3, 0x4f800000, v2
 ; GFX9-NEXT:    v_rcp_f32_e32 v2, v2
 ; GFX9-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; GFX9-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
 ; GFX9-NEXT:    v_trunc_f32_e32 v3, v3
-; GFX9-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
+; GFX9-NEXT:    v_madmk_f32 v2, v3, 0xcf800000, v2
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v6, v2
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v12, v3
 ; GFX9-NEXT:    v_mul_lo_u32 v4, v8, v6
@@ -171,12 +171,12 @@ define i64 @udiv64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v5, v3
 ; GFX9-NEXT:    v_sub_co_u32_e32 v10, vcc, 0, v2
 ; GFX9-NEXT:    v_subb_co_u32_e32 v11, vcc, 0, v3, vcc
-; GFX9-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GFX9-NEXT:    v_madmk_f32 v4, v5, 0x4f800000, v4
 ; GFX9-NEXT:    v_rcp_f32_e32 v4, v4
 ; GFX9-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; GFX9-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
 ; GFX9-NEXT:    v_trunc_f32_e32 v5, v5
-; GFX9-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; GFX9-NEXT:    v_madmk_f32 v4, v5, 0xcf800000, v4
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v8, v5
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v9, v4
 ; GFX9-NEXT:    v_mul_lo_u32 v6, v10, v8
@@ -312,12 +312,12 @@ define i64 @srem64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v3, v9
 ; GFX9-NEXT:    v_sub_co_u32_e32 v7, vcc, 0, v10
 ; GFX9-NEXT:    v_subb_co_u32_e32 v8, vcc, 0, v9, vcc
-; GFX9-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; GFX9-NEXT:    v_madmk_f32 v2, v3, 0x4f800000, v2
 ; GFX9-NEXT:    v_rcp_f32_e32 v2, v2
 ; GFX9-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; GFX9-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
 ; GFX9-NEXT:    v_trunc_f32_e32 v3, v3
-; GFX9-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
+; GFX9-NEXT:    v_madmk_f32 v2, v3, 0xcf800000, v2
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v6, v2
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v11, v3
 ; GFX9-NEXT:    v_mul_lo_u32 v4, v8, v6
@@ -454,12 +454,12 @@ define i64 @urem64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v5, v3
 ; GFX9-NEXT:    v_sub_co_u32_e32 v10, vcc, 0, v2
 ; GFX9-NEXT:    v_subb_co_u32_e32 v11, vcc, 0, v3, vcc
-; GFX9-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GFX9-NEXT:    v_madmk_f32 v4, v5, 0x4f800000, v4
 ; GFX9-NEXT:    v_rcp_f32_e32 v4, v4
 ; GFX9-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; GFX9-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
 ; GFX9-NEXT:    v_trunc_f32_e32 v5, v5
-; GFX9-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; GFX9-NEXT:    v_madmk_f32 v4, v5, 0xcf800000, v4
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v8, v5
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v9, v4
 ; GFX9-NEXT:    v_mul_lo_u32 v6, v10, v8
@@ -709,118 +709,118 @@ define <2 x i64> @sdivrem64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    s_xor_b64 s[10:11], exec, s[4:5]
 ; GFX9-NEXT:    s_cbranch_execz .LBB8_2
 ; GFX9-NEXT:  ; %bb.1:
-; GFX9-NEXT:    v_ashrrev_i32_e32 v11, 31, v3
-; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v2, v11
-; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, v3, v11, vcc
-; GFX9-NEXT:    v_xor_b32_e32 v2, v2, v11
-; GFX9-NEXT:    v_xor_b32_e32 v3, v4, v11
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v4, v3
-; GFX9-NEXT:    v_cvt_f32_u32_e32 v5, v2
-; GFX9-NEXT:    v_sub_co_u32_e32 v9, vcc, 0, v3
-; GFX9-NEXT:    v_subb_co_u32_e32 v10, vcc, 0, v2, vcc
-; GFX9-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
-; GFX9-NEXT:    v_rcp_f32_e32 v4, v4
-; GFX9-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; GFX9-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
-; GFX9-NEXT:    v_trunc_f32_e32 v5, v5
-; GFX9-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v8, v4
-; GFX9-NEXT:    v_cvt_u32_f32_e32 v12, v5
-; GFX9-NEXT:    v_mul_lo_u32 v6, v10, v8
-; GFX9-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v9, v8, 0
-; GFX9-NEXT:    v_mul_lo_u32 v7, v9, v12
-; GFX9-NEXT:    v_mul_hi_u32 v13, v8, v4
-; GFX9-NEXT:    v_add3_u32 v7, v5, v7, v6
-; GFX9-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v8, v7, 0
-; GFX9-NEXT:    v_add_co_u32_e32 v13, vcc, v13, v5
-; GFX9-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v12, v4, 0
-; GFX9-NEXT:    v_addc_co_u32_e32 v14, vcc, 0, v6, vcc
-; GFX9-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v12, v7, 0
-; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v13, v4
-; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, v14, v5, vcc
-; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, 0, v7, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v4, v6
-; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v13, vcc, v8, v4
-; GFX9-NEXT:    v_addc_co_u32_e32 v12, vcc, v12, v5, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v6, v9, v12
-; GFX9-NEXT:    v_mul_lo_u32 v7, v10, v13
-; GFX9-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v9, v13, 0
-; GFX9-NEXT:    v_add3_u32 v7, v5, v6, v7
-; GFX9-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v12, v7, 0
-; GFX9-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v13, v7, 0
-; GFX9-NEXT:    v_mul_hi_u32 v14, v13, v4
-; GFX9-NEXT:    v_mad_u64_u32 v[9:10], s[4:5], v12, v4, 0
-; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v14, v7
-; GFX9-NEXT:    v_addc_co_u32_e32 v7, vcc, 0, v8, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v4, v9
-; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, v7, v10, vcc
-; GFX9-NEXT:    v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v4, v5
+; GFX9-NEXT:    v_ashrrev_i32_e32 v9, 31, v3
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v9
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v3, v9, vcc
+; GFX9-NEXT:    v_xor_b32_e32 v10, v3, v9
+; GFX9-NEXT:    v_xor_b32_e32 v11, v2, v9
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v2, v11
+; GFX9-NEXT:    v_cvt_f32_u32_e32 v3, v10
+; GFX9-NEXT:    v_sub_co_u32_e32 v7, vcc, 0, v11
+; GFX9-NEXT:    v_subb_co_u32_e32 v8, vcc, 0, v10, vcc
+; GFX9-NEXT:    v_madmk_f32 v2, v3, 0x4f800000, v2
+; GFX9-NEXT:    v_rcp_f32_e32 v2, v2
+; GFX9-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
+; GFX9-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
+; GFX9-NEXT:    v_trunc_f32_e32 v3, v3
+; GFX9-NEXT:    v_madmk_f32 v2, v3, 0xcf800000, v2
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v6, v2
+; GFX9-NEXT:    v_cvt_u32_f32_e32 v12, v3
+; GFX9-NEXT:    v_mul_lo_u32 v4, v8, v6
+; GFX9-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v7, v6, 0
+; GFX9-NEXT:    v_mul_lo_u32 v5, v7, v12
+; GFX9-NEXT:    v_mul_hi_u32 v13, v6, v2
+; GFX9-NEXT:    v_add3_u32 v5, v3, v5, v4
+; GFX9-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0
+; GFX9-NEXT:    v_add_co_u32_e32 v13, vcc, v13, v3
+; GFX9-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v12, v2, 0
+; GFX9-NEXT:    v_addc_co_u32_e32 v14, vcc, 0, v4, vcc
+; GFX9-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v12, v5, 0
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v13, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, v14, v3, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v5, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v4
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v13, vcc, v6, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v12, vcc, v12, v3, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v4, v7, v12
+; GFX9-NEXT:    v_mul_lo_u32 v5, v8, v13
+; GFX9-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v7, v13, 0
+; GFX9-NEXT:    v_add3_u32 v5, v3, v4, v5
+; GFX9-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v12, v5, 0
+; GFX9-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v13, v5, 0
+; GFX9-NEXT:    v_mul_hi_u32 v14, v13, v2
+; GFX9-NEXT:    v_mad_u64_u32 v[7:8], s[4:5], v12, v2, 0
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v14, v5
 ; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, 0, v6, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v13, v4
-; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, v12, v5, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v7
+; GFX9-NEXT:    v_addc_co_u32_e32 v2, vcc, v5, v8, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, 0, v4, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v2, v3
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v4, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v13, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, v12, v3, vcc
 ; GFX9-NEXT:    v_ashrrev_i32_e32 v7, 31, v1
 ; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v7
-; GFX9-NEXT:    v_xor_b32_e32 v8, v0, v7
-; GFX9-NEXT:    v_addc_co_u32_e32 v6, vcc, v1, v7, vcc
-; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v8, v5, 0
-; GFX9-NEXT:    v_mul_hi_u32 v9, v8, v4
-; GFX9-NEXT:    v_xor_b32_e32 v6, v6, v7
-; GFX9-NEXT:    v_add_co_u32_e32 v9, vcc, v9, v0
-; GFX9-NEXT:    v_addc_co_u32_e32 v10, vcc, 0, v1, vcc
-; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v6, v4, 0
-; GFX9-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v6, v5, 0
-; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v9, v0
-; GFX9-NEXT:    v_addc_co_u32_e32 v0, vcc, v10, v1, vcc
-; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v5, vcc
-; GFX9-NEXT:    v_add_co_u32_e32 v4, vcc, v0, v4
-; GFX9-NEXT:    v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
-; GFX9-NEXT:    v_mul_lo_u32 v9, v2, v4
-; GFX9-NEXT:    v_mul_lo_u32 v10, v3, v5
-; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v3, v4, 0
-; GFX9-NEXT:    v_add3_u32 v1, v1, v10, v9
-; GFX9-NEXT:    v_sub_u32_e32 v9, v6, v1
-; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v8, v0
-; GFX9-NEXT:    v_subb_co_u32_e64 v8, s[4:5], v9, v2, vcc
-; GFX9-NEXT:    v_sub_co_u32_e64 v9, s[4:5], v0, v3
-; GFX9-NEXT:    v_subbrev_co_u32_e64 v10, s[6:7], 0, v8, s[4:5]
-; GFX9-NEXT:    v_cmp_ge_u32_e64 s[6:7], v10, v2
-; GFX9-NEXT:    v_cndmask_b32_e64 v12, 0, -1, s[6:7]
-; GFX9-NEXT:    v_cmp_ge_u32_e64 s[6:7], v9, v3
+; GFX9-NEXT:    v_xor_b32_e32 v5, v0, v7
+; GFX9-NEXT:    v_addc_co_u32_e32 v4, vcc, v1, v7, vcc
+; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v5, v3, 0
+; GFX9-NEXT:    v_mul_hi_u32 v6, v5, v2
+; GFX9-NEXT:    v_xor_b32_e32 v4, v4, v7
+; GFX9-NEXT:    v_add_co_u32_e32 v6, vcc, v6, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v8, vcc, 0, v1, vcc
+; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0
+; GFX9-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0
+; GFX9-NEXT:    v_add_co_u32_e32 v0, vcc, v6, v0
+; GFX9-NEXT:    v_addc_co_u32_e32 v0, vcc, v8, v1, vcc
+; GFX9-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-NEXT:    v_add_co_u32_e32 v2, vcc, v0, v2
+; GFX9-NEXT:    v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
+; GFX9-NEXT:    v_mul_lo_u32 v6, v10, v2
+; GFX9-NEXT:    v_mul_lo_u32 v8, v11, v3
+; GFX9-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v11, v2, 0
+; GFX9-NEXT:    v_add3_u32 v1, v1, v8, v6
+; GFX9-NEXT:    v_sub_u32_e32 v6, v4, v1
+; GFX9-NEXT:    v_sub_co_u32_e32 v0, vcc, v5, v0
+; GFX9-NEXT:    v_subb_co_u32_e64 v6, s[4:5], v6, v10, vcc
+; GFX9-NEXT:    v_sub_co_u32_e64 v8, s[4:5], v0, v11
+; GFX9-NEXT:    v_subbrev_co_u32_e64 v12, s[6:7], 0, v6, s[4:5]
+; GFX9-NEXT:    v_cmp_ge_u32_e64 s[6:7], v12, v10
+; GFX9-NEXT:    v_cndmask_b32_e64 v5, 0, -1, s[6:7]
+; GFX9-NEXT:    v_cmp_ge_u32_e64 s[6:7], v8, v11
 ; GFX9-NEXT:    v_cndmask_b32_e64 v13, 0, -1, s[6:7]
-; GFX9-NEXT:    v_cmp_eq_u32_e64 s[6:7], v10, v2
-; GFX9-NEXT:    v_cndmask_b32_e64 v12, v12, v13, s[6:7]
-; GFX9-NEXT:    v_add_co_u32_e64 v13, s[6:7], 2, v4
-; GFX9-NEXT:    v_addc_co_u32_e64 v14, s[6:7], 0, v5, s[6:7]
-; GFX9-NEXT:    v_add_co_u32_e64 v15, s[6:7], 1, v4
-; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v6, v1, vcc
-; GFX9-NEXT:    v_addc_co_u32_e64 v16, s[6:7], 0, v5, s[6:7]
-; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v2
-; GFX9-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v12
-; GFX9-NEXT:    v_cndmask_b32_e64 v6, 0, -1, vcc
-; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v3
-; GFX9-NEXT:    v_cndmask_b32_e64 v12, v16, v14, s[6:7]
+; GFX9-NEXT:    v_cmp_eq_u32_e64 s[6:7], v12, v10
+; GFX9-NEXT:    v_cndmask_b32_e64 v5, v5, v13, s[6:7]
+; GFX9-NEXT:    v_add_co_u32_e64 v13, s[6:7], 2, v2
+; GFX9-NEXT:    v_addc_co_u32_e64 v14, s[6:7], 0, v3, s[6:7]
+; GFX9-NEXT:    v_add_co_u32_e64 v15, s[6:7], 1, v2
+; GFX9-NEXT:    v_subb_co_u32_e32 v1, vcc, v4, v1, vcc
+; GFX9-NEXT:    v_addc_co_u32_e64 v16, s[6:7], 0, v3, s[6:7]
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, v1, v10
+; GFX9-NEXT:    v_cmp_ne_u32_e64 s[6:7], 0, v5
+; GFX9-NEXT:    v_cndmask_b32_e64 v4, 0, -1, vcc
+; GFX9-NEXT:    v_cmp_ge_u32_e32 vcc, v0, v11
+; GFX9-NEXT:    v_cndmask_b32_e64 v5, v16, v14, s[6:7]
 ; GFX9-NEXT:    v_cndmask_b32_e64 v14, 0, -1, vcc
-; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT:    v_subb_co_u32_e64 v2, s[4:5], v8, v2, s[4:5]
-; GFX9-NEXT:    v_sub_co_u32_e64 v3, s[4:5], v9, v3
-; GFX9-NEXT:    v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX9-NEXT:    v_cmp_eq_u32_e32 vcc, v1, v10
+; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v14, vcc
+; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v4
+; GFX9-NEXT:    v_cndmask_b32_e64 v4, v15, v13, s[6:7]
+; GFX9-NEXT:    v_cndmask_b32_e32 v3, v3, v5, vcc
+; GFX9-NEXT:    v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX9-NEXT:    v_xor_b32_e32 v5, v7, v9
+; GFX9-NEXT:    v_xor_b32_e32 v2, v2, v5
+; GFX9-NEXT:    v_xor_b32_e32 v3, v3, v5
+; GFX9-NEXT:    v_sub_co_u32_e64 v4, s[8:9], v2, v5
+; GFX9-NEXT:    v_subb_co_u32_e64 v2, s[4:5], v6, v10, s[4:5]
+; GFX9-NEXT:    v_subb_co_u32_e64 v5, s[8:9], v3, v5, s[8:9]
+; GFX9-NEXT:    v_sub_co_u32_e64 v3, s[4:5], v8, v11
 ; GFX9-NEXT:    v_subbrev_co_u32_e64 v2, s[4:5], 0, v2, s[4:5]
-; GFX9-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v6
-; GFX9-NEXT:    v_cndmask_b32_e64 v6, v15, v13, s[6:7]
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, v10, v2, s[6:7]
-; GFX9-NEXT:    v_cndmask_b32_e32 v4, v4, v6, vcc
-; GFX9-NEXT:    v_xor_b32_e32 v6, v7, v11
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, v12, v2, s[6:7]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
-; GFX9-NEXT:    v_cndmask_b32_e64 v2, v9, v3, s[6:7]
-; GFX9-NEXT:    v_cndmask_b32_e32 v5, v5, v12, vcc
-; GFX9-NEXT:    v_xor_b32_e32 v4, v4, v6
+; GFX9-NEXT:    v_cndmask_b32_e64 v2, v8, v3, s[6:7]
 ; GFX9-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT:    v_xor_b32_e32 v5, v5, v6
-; GFX9-NEXT:    v_sub_co_u32_e64 v4, s[8:9], v4, v6
 ; GFX9-NEXT:    v_xor_b32_e32 v0, v0, v7
-; GFX9-NEXT:    v_subb_co_u32_e64 v5, s[8:9], v5, v6, s[8:9]
 ; GFX9-NEXT:    v_xor_b32_e32 v1, v1, v7
 ; GFX9-NEXT:    v_sub_co_u32_e32 v6, vcc, v0, v7
 ; GFX9-NEXT:    v_subb_co_u32_e32 v7, vcc, v1, v7, vcc
@@ -884,12 +884,12 @@ define <2 x i64> @udivrem64(i64 %a, i64 %b) {
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v5, v3
 ; GFX9-NEXT:    v_sub_co_u32_e32 v10, vcc, 0, v2
 ; GFX9-NEXT:    v_subb_co_u32_e32 v11, vcc, 0, v3, vcc
-; GFX9-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GFX9-NEXT:    v_madmk_f32 v4, v5, 0x4f800000, v4
 ; GFX9-NEXT:    v_rcp_f32_e32 v4, v4
 ; GFX9-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; GFX9-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
 ; GFX9-NEXT:    v_trunc_f32_e32 v5, v5
-; GFX9-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; GFX9-NEXT:    v_madmk_f32 v4, v5, 0xcf800000, v4
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v8, v5
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v9, v4
 ; GFX9-NEXT:    v_mul_lo_u32 v6, v10, v8
diff --git a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
index a9a6075516f6c3e..50693a92bc92cb9 100644
--- a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
+++ b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
@@ -1804,12 +1804,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; CISI-NEXT:    v_cvt_f32_u32_e32 v1, s3
 ; CISI-NEXT:    s_sub_u32 s0, 0, s2
 ; CISI-NEXT:    s_subb_u32 s1, 0, s3
-; CISI-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; CISI-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
 ; CISI-NEXT:    v_rcp_f32_e32 v0, v0
 ; CISI-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; CISI-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; CISI-NEXT:    v_trunc_f32_e32 v1, v1
-; CISI-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; CISI-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; CISI-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; CISI-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; CISI-NEXT:    v_mul_lo_u32 v2, s0, v1
@@ -1954,12 +1954,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; VI-NEXT:    v_cvt_f32_u32_e32 v1, s3
 ; VI-NEXT:    s_sub_u32 s8, 0, s2
 ; VI-NEXT:    s_subb_u32 s9, 0, s3
-; VI-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; VI-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
 ; VI-NEXT:    v_rcp_f32_e32 v0, v0
 ; VI-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; VI-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; VI-NEXT:    v_trunc_f32_e32 v1, v1
-; VI-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; VI-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; VI-NEXT:    v_cvt_u32_f32_e32 v4, v1
 ; VI-NEXT:    v_cvt_u32_f32_e32 v5, v0
 ; VI-NEXT:    v_mul_lo_u32 v2, s8, v4
@@ -2111,12 +2111,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GFX9-NEXT:    v_cvt_f32_u32_e32 v1, s3
 ; GFX9-NEXT:    s_sub_u32 s0, 0, s2
 ; GFX9-NEXT:    s_subb_u32 s1, 0, s3
-; GFX9-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; GFX9-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
 ; GFX9-NEXT:    v_rcp_f32_e32 v0, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX9-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX9-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX9-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX9-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX9-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX9-NEXT:    v_readfirstlane_b32 s10, v1
@@ -2279,12 +2279,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GFX1010-NEXT:    v_cvt_f32_u32_e32 v1, s3
 ; GFX1010-NEXT:    s_sub_u32 s9, 0, s2
 ; GFX1010-NEXT:    s_subb_u32 s10, 0, s3
-; GFX1010-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; GFX1010-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
 ; GFX1010-NEXT:    v_rcp_f32_e32 v0, v0
 ; GFX1010-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX1010-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX1010-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX1010-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX1010-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GFX1010-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX1010-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX1010-NEXT:    v_readfirstlane_b32 s0, v1
@@ -2441,12 +2441,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GFX1030W32-NEXT:    v_cvt_f32_u32_e32 v1, s3
 ; GFX1030W32-NEXT:    s_sub_u32 s9, 0, s2
 ; GFX1030W32-NEXT:    s_subb_u32 s10, 0, s3
-; GFX1030W32-NEXT:    v_fmac_f32_e32 v0, 0x4f800000, v1
+; GFX1030W32-NEXT:    v_fmamk_f32 v0, v1, 0x4f800000, v0
 ; GFX1030W32-NEXT:    v_rcp_f32_e32 v0, v0
 ; GFX1030W32-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX1030W32-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX1030W32-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX1030W32-NEXT:    v_fmac_f32_e32 v0, 0xcf800000, v1
+; GFX1030W32-NEXT:    v_fmamk_f32 v0, v1, 0xcf800000, v0
 ; GFX1030W32-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX1030W32-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX1030W32-NEXT:    v_readfirstlane_b32 s0, v1
@@ -2603,12 +2603,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GFX1030W64-NEXT:    v_cvt_f32_u32_e32 v1, s3
 ; GFX1030W64-NEXT:    s_sub_u32 s9, 0, s2
 ; GFX1030W64-NEXT:    s_subb_u32 s10, 0, s3
-; GFX1030W64-NEXT:    v_fmac_f32_e32 v0, 0x4f800000, v1
+; GFX1030W64-NEXT:    v_fmamk_f32 v0, v1, 0x4f800000, v0
 ; GFX1030W64-NEXT:    v_rcp_f32_e32 v0, v0
 ; GFX1030W64-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX1030W64-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX1030W64-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX1030W64-NEXT:    v_fmac_f32_e32 v0, 0xcf800000, v1
+; GFX1030W64-NEXT:    v_fmamk_f32 v0, v1, 0xcf800000, v0
 ; GFX1030W64-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX1030W64-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX1030W64-NEXT:    v_readfirstlane_b32 s8, v1
@@ -2766,7 +2766,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GFX11-NEXT:    s_sub_u32 s9, 0, s2
 ; GFX11-NEXT:    s_subb_u32 s10, 0, s3
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_fmac_f32_e32 v0, 0x4f800000, v1
+; GFX11-NEXT:    v_fmamk_f32 v0, v1, 0x4f800000, v0
 ; GFX11-NEXT:    v_rcp_f32_e32 v0, v0
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX11-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
@@ -2774,7 +2774,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GFX11-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX11-NEXT:    v_trunc_f32_e32 v1, v1
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_fmac_f32_e32 v0, 0xcf800000, v1
+; GFX11-NEXT:    v_fmamk_f32 v0, v1, 0xcf800000, v0
 ; GFX11-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX11-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
index fe649d433304178..9fb0cab068d2862 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
@@ -7,6 +7,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
 ; GFX10:       ; %bb.0: ; %.entry
 ; GFX10-NEXT:    image_sample v[0:1], v[0:1], s[0:7], s[0:3] dmask:0x3 dim:SQ_RSRC_IMG_2D
 ; GFX10-NEXT:    v_mov_b32_e32 v4, 0
+; GFX10-NEXT:    v_mov_b32_e32 v7, 0x3ca3d70a
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-NEXT:    s_clause 0x1
 ; GFX10-NEXT:    image_sample v2, v[0:1], s[0:7], s[0:3] dmask:0x4 dim:SQ_RSRC_IMG_2D
@@ -36,34 +37,33 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
 ; GFX10-NEXT:    v_fma_f32 v1, v1, v5, s28
 ; GFX10-NEXT:    v_max_f32_e64 v6, s0, s0 clamp
 ; GFX10-NEXT:    v_add_f32_e64 v5, s29, -1.0
-; GFX10-NEXT:    v_sub_f32_e32 v8, s0, v1
-; GFX10-NEXT:    v_fma_f32 v7, -s2, v6, s6
+; GFX10-NEXT:    v_sub_f32_e32 v9, s0, v1
+; GFX10-NEXT:    v_fma_f32 v8, -s2, v6, s6
 ; GFX10-NEXT:    v_fma_f32 v5, v6, v5, 1.0
-; GFX10-NEXT:    v_mad_f32 v10, s2, v6, v2
-; GFX10-NEXT:    s_mov_b32 s0, 0x3c23d70a
-; GFX10-NEXT:    v_fmac_f32_e32 v1, v6, v8
-; GFX10-NEXT:    v_fmac_f32_e32 v10, v7, v6
+; GFX10-NEXT:    v_mad_f32 v11, s2, v6, v2
+; GFX10-NEXT:    v_fmac_f32_e32 v1, v6, v9
+; GFX10-NEXT:    v_fmac_f32_e32 v11, v8, v6
 ; GFX10-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX10-NEXT:    v_mul_f32_e32 v9, s10, v0
+; GFX10-NEXT:    v_mul_f32_e32 v10, s10, v0
 ; GFX10-NEXT:    v_fma_f32 v0, -v0, s10, s14
-; GFX10-NEXT:    v_mul_f32_e32 v8, s18, v2
+; GFX10-NEXT:    v_mul_f32_e32 v9, s18, v2
 ; GFX10-NEXT:    v_mul_f32_e32 v3, s22, v3
-; GFX10-NEXT:    v_fmac_f32_e32 v9, v0, v6
+; GFX10-NEXT:    v_fmac_f32_e32 v10, v0, v6
 ; GFX10-NEXT:    v_sub_f32_e32 v0, v1, v5
-; GFX10-NEXT:    v_mul_f32_e32 v1, v8, v6
-; GFX10-NEXT:    v_mul_f32_e32 v7, v6, v3
-; GFX10-NEXT:    v_fma_f32 v3, -v6, v3, v9
+; GFX10-NEXT:    v_mul_f32_e32 v1, v9, v6
+; GFX10-NEXT:    v_mul_f32_e32 v8, v6, v3
+; GFX10-NEXT:    v_fma_f32 v3, -v6, v3, v10
 ; GFX10-NEXT:    v_fmac_f32_e32 v5, v0, v6
 ; GFX10-NEXT:    v_fma_f32 v0, v2, s26, -v1
-; GFX10-NEXT:    v_fmac_f32_e32 v7, v3, v6
+; GFX10-NEXT:    v_fmac_f32_e32 v8, v3, v6
 ; GFX10-NEXT:    v_fmac_f32_e32 v1, v0, v6
 ; GFX10-NEXT:    v_mul_f32_e32 v0, v2, v6
 ; GFX10-NEXT:    s_waitcnt vmcnt(0)
-; GFX10-NEXT:    v_add_f32_e32 v4, v4, v10
+; GFX10-NEXT:    v_add_f32_e32 v4, v4, v11
 ; GFX10-NEXT:    v_mul_f32_e32 v3, v4, v6
-; GFX10-NEXT:    v_fmaak_f32 v4, s0, v5, 0x3ca3d70a
+; GFX10-NEXT:    v_fmamk_f32 v4, v5, 0x3c23d70a, v7
 ; GFX10-NEXT:    v_mul_f32_e32 v1, v3, v1
-; GFX10-NEXT:    v_mul_f32_e32 v2, v7, v4
+; GFX10-NEXT:    v_mul_f32_e32 v2, v8, v4
 ; GFX10-NEXT:    v_fmac_f32_e32 v1, v2, v0
 ; GFX10-NEXT:    v_max_f32_e32 v0, 0, v1
 ; GFX10-NEXT:    ; return to shader part epilog
@@ -71,7 +71,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
 ; GFX11-LABEL: _amdgpu_ps_main:
 ; GFX11:       ; %bb.0: ; %.entry
 ; GFX11-NEXT:    image_sample v[0:1], v[0:1], s[0:7], s[0:3] dmask:0x3 dim:SQ_RSRC_IMG_2D
-; GFX11-NEXT:    v_mov_b32_e32 v4, 0
+; GFX11-NEXT:    v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v7, 0x3ca3d70a
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
 ; GFX11-NEXT:    s_clause 0x1
 ; GFX11-NEXT:    image_sample v2, v[0:1], s[0:7], s[0:3] dmask:0x4 dim:SQ_RSRC_IMG_2D
@@ -96,43 +96,40 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
 ; GFX11-NEXT:    s_buffer_load_b128 s[20:23], s[0:3], 0x70
 ; GFX11-NEXT:    v_fma_f32 v1, v1, v5, s28
 ; GFX11-NEXT:    v_max_f32_e64 v6, s0, s0 clamp
-; GFX11-NEXT:    s_buffer_load_b128 s[24:27], s[0:3], 0x10
 ; GFX11-NEXT:    v_add_f32_e64 v5, s29, -1.0
+; GFX11-NEXT:    s_buffer_load_b128 s[24:27], s[0:3], 0x10
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_sub_f32_e32 v8, s0, v1
-; GFX11-NEXT:    v_fma_f32 v7, -s2, v6, s6
-; GFX11-NEXT:    v_fma_f32 v10, s2, v6, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_4)
+; GFX11-NEXT:    v_sub_f32_e32 v9, s0, v1
+; GFX11-NEXT:    v_fma_f32 v8, -s2, v6, s6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3)
 ; GFX11-NEXT:    v_fma_f32 v5, v6, v5, 1.0
-; GFX11-NEXT:    s_mov_b32 s0, 0x3c23d70a
+; GFX11-NEXT:    v_fma_f32 v11, s2, v6, v2
 ; GFX11-NEXT:    s_waitcnt lgkmcnt(0)
-; GFX11-NEXT:    v_mul_f32_e32 v9, s10, v0
+; GFX11-NEXT:    v_mul_f32_e32 v10, s10, v0
 ; GFX11-NEXT:    v_fma_f32 v0, -v0, s10, s14
-; GFX11-NEXT:    v_mul_f32_e32 v3, s22, v3
-; GFX11-NEXT:    v_dual_fmac_f32 v1, v6, v8 :: v_dual_mul_f32 v8, s18, v2
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_fmac_f32_e32 v9, v0, v6
-; GFX11-NEXT:    v_dual_fmac_f32 v10, v7, v6 :: v_dual_mul_f32 v7, v6, v3
+; GFX11-NEXT:    v_fmac_f32_e32 v1, v6, v9
+; GFX11-NEXT:    v_mul_f32_e32 v9, s18, v2
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_fmac_f32_e32 v10, v0, v6
 ; GFX11-NEXT:    v_sub_f32_e32 v0, v1, v5
-; GFX11-NEXT:    v_fma_f32 v3, -v6, v3, v9
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_fmac_f32_e32 v7, v3, v6
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_fmac_f32_e32 v5, v0, v6
-; GFX11-NEXT:    v_mul_f32_e32 v1, v8, v6
+; GFX11-NEXT:    v_mul_f32_e32 v3, s22, v3
+; GFX11-NEXT:    v_dual_fmac_f32 v11, v8, v6 :: v_dual_mul_f32 v8, v6, v3
+; GFX11-NEXT:    v_mul_f32_e32 v1, v9, v6
+; GFX11-NEXT:    v_fma_f32 v3, -v6, v3, v10
 ; GFX11-NEXT:    s_waitcnt vmcnt(0)
-; GFX11-NEXT:    v_add_f32_e32 v4, v4, v10
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT:    v_dual_mul_f32 v3, v4, v6 :: v_dual_fmaak_f32 v4, s0, v5, 0x3ca3d70a
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT:    v_add_f32_e32 v4, v4, v11
 ; GFX11-NEXT:    v_fma_f32 v0, v2, s26, -v1
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_fmac_f32_e32 v1, v0, v6
 ; GFX11-NEXT:    v_mul_f32_e32 v0, v2, v6
-; GFX11-NEXT:    v_mul_f32_e32 v2, v7, v4
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_mul_f32_e32 v1, v3, v1
+; GFX11-NEXT:    v_fmac_f32_e32 v8, v3, v6
+; GFX11-NEXT:    v_dual_mul_f32 v3, v4, v6 :: v_dual_fmamk_f32 v4, v5, 0x3c23d70a, v7
+; GFX11-NEXT:    v_dual_mul_f32 v1, v3, v1 :: v_dual_mul_f32 v2, v8, v4
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_fmac_f32_e32 v1, v2, v0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NEXT:    v_max_f32_e32 v0, 0, v1
 ; GFX11-NEXT:    ; return to shader part epilog
 .entry:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll
index 226670a550014ca..d4ad53291070d78 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll
@@ -33,7 +33,7 @@ define amdgpu_kernel void @mad_f16_imm_a(
 }
 
 ; GCN-LABEL: {{^}}mad_f16_imm_b:
-; GCN: v_mac_f16_e32 {{v[0-9]+}}, 0x4800, {{v[0-9]+$}}
+; GCN: v_madmk_f16 {{v[0-9]+}}, {{v[0-9]+}}, 0x4800, {{v[0-9]+$}}
 define amdgpu_kernel void @mad_f16_imm_b(
     ptr addrspace(1) %r,
     ptr addrspace(1) %a,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
index f90d338ffc48736..1926d0f8f0d6fc9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
@@ -256,8 +256,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_a(
 ; SI-NEXT:    s_mov_b32 s1, s5
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-NEXT:    v_mac_f32_e32 v1, 0x40400000, v0
-; SI-NEXT:    v_cvt_f16_f32_e32 v0, v1
+; SI-NEXT:    v_madmk_f32 v0, v0, 0x40400000, v1
+; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
@@ -280,8 +280,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_a(
 ; VI-FLUSH-NEXT:    s_waitcnt vmcnt(0)
 ; VI-FLUSH-NEXT:    s_mov_b32 s0, s4
 ; VI-FLUSH-NEXT:    s_mov_b32 s1, s5
-; VI-FLUSH-NEXT:    v_mac_f16_e32 v1, 0x4200, v0
-; VI-FLUSH-NEXT:    buffer_store_short v1, off, s[0:3], 0
+; VI-FLUSH-NEXT:    v_madmk_f16 v0, v0, 0x4200, v1
+; VI-FLUSH-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; VI-FLUSH-NEXT:    s_endpgm
 ;
 ; VI-DENORM-LABEL: fmuladd_f16_imm_a:
@@ -353,8 +353,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_a(
 ; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-DENORM-NEXT:    s_mov_b32 s0, s4
 ; GFX10-DENORM-NEXT:    s_mov_b32 s1, s5
-; GFX10-DENORM-NEXT:    v_fmac_f16_e32 v1, 0x4200, v0
-; GFX10-DENORM-NEXT:    buffer_store_short v1, off, s[0:3], 0
+; GFX10-DENORM-NEXT:    v_fmamk_f16 v0, v0, 0x4200, v1
+; GFX10-DENORM-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; GFX10-DENORM-NEXT:    s_endpgm
 ;
 ; GFX11-FLUSH-LABEL: fmuladd_f16_imm_a:
@@ -442,8 +442,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_b(
 ; SI-NEXT:    s_mov_b32 s1, s5
 ; SI-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-NEXT:    v_mac_f32_e32 v1, 0x40400000, v0
-; SI-NEXT:    v_cvt_f16_f32_e32 v0, v1
+; SI-NEXT:    v_madmk_f32 v0, v0, 0x40400000, v1
+; SI-NEXT:    v_cvt_f16_f32_e32 v0, v0
 ; SI-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; SI-NEXT:    s_endpgm
 ;
@@ -466,8 +466,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_b(
 ; VI-FLUSH-NEXT:    s_waitcnt vmcnt(0)
 ; VI-FLUSH-NEXT:    s_mov_b32 s0, s4
 ; VI-FLUSH-NEXT:    s_mov_b32 s1, s5
-; VI-FLUSH-NEXT:    v_mac_f16_e32 v1, 0x4200, v0
-; VI-FLUSH-NEXT:    buffer_store_short v1, off, s[0:3], 0
+; VI-FLUSH-NEXT:    v_madmk_f16 v0, v0, 0x4200, v1
+; VI-FLUSH-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; VI-FLUSH-NEXT:    s_endpgm
 ;
 ; VI-DENORM-LABEL: fmuladd_f16_imm_b:
@@ -539,8 +539,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_b(
 ; GFX10-DENORM-NEXT:    s_waitcnt vmcnt(0)
 ; GFX10-DENORM-NEXT:    s_mov_b32 s0, s4
 ; GFX10-DENORM-NEXT:    s_mov_b32 s1, s5
-; GFX10-DENORM-NEXT:    v_fmac_f16_e32 v1, 0x4200, v0
-; GFX10-DENORM-NEXT:    buffer_store_short v1, off, s[0:3], 0
+; GFX10-DENORM-NEXT:    v_fmamk_f16 v0, v0, 0x4200, v1
+; GFX10-DENORM-NEXT:    buffer_store_short v0, off, s[0:3], 0
 ; GFX10-DENORM-NEXT:    s_endpgm
 ;
 ; GFX11-FLUSH-LABEL: fmuladd_f16_imm_b:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
index f55242a8726beb1..528232a203acfe1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
@@ -203,7 +203,7 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -2269,7 +2269,7 @@ define float @v_log_f32(float %in) {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
@@ -2472,7 +2472,7 @@ define float @v_log_fabs_f32(float %in) {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -2675,7 +2675,7 @@ define float @v_log_fneg_fabs_f32(float %in) {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -2878,7 +2878,7 @@ define float @v_log_fneg_f32(float %in) {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
@@ -3015,9 +3015,7 @@ define float @v_log_f32_fast(float %in) {
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v1, 0x3f317218, v0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3f317218, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-GISEL-LABEL: v_log_f32_fast:
@@ -3135,9 +3133,7 @@ define float @v_log_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v1, 0x3f317218, v0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3f317218, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-GISEL-LABEL: v_log_f32_unsafe_math_attr:
@@ -3255,9 +3251,7 @@ define float @v_log_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v1, 0x3f317218, v0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3f317218, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-GISEL-LABEL: v_log_f32_approx_fn_attr:
@@ -3441,7 +3435,7 @@ define float @v_log_f32_ninf(float %in) {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
@@ -3577,9 +3571,7 @@ define float @v_log_f32_afn(float %in) {
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v1, 0x3f317218, v0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3f317218, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-GISEL-LABEL: v_log_f32_afn:
@@ -3726,9 +3718,7 @@ define float @v_log_f32_afn_dynamic(float %in) #1 {
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v1, 0x3f317218, v0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3f317218, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-GISEL-LABEL: v_log_f32_afn_dynamic:
@@ -3844,11 +3834,10 @@ define float @v_fabs_log_f32_afn(float %in) {
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0xc1b17218, s0
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e64 v0, |v0|, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v1, 0x3f317218, v0
-; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3f317218, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-GISEL-LABEL: v_fabs_log_f32_afn:
@@ -3982,7 +3971,7 @@ define float @v_log_f32_daz(float %in) #0 {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -4174,7 +4163,7 @@ define float @v_log_f32_nnan(float %in) {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
@@ -4324,7 +4313,7 @@ define float @v_log_f32_nnan_daz(float %in) #0 {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -4516,7 +4505,7 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
@@ -4666,7 +4655,7 @@ define float @v_log_f32_ninf_daz(float %in) #0 {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -4858,7 +4847,7 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
@@ -5041,9 +5030,9 @@ define float @v_log_f32_nnan_ninf(float %in) {
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_add_f32_e32 v0, v1, v2
+; GFX1100-SDAG-NEXT:    v_add_f32_e32 v0, v1, v0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -5152,9 +5141,9 @@ define float @v_log_f32_nnan_ninf_daz(float %in) #0 {
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_add_f32_e32 v0, v1, v2
+; GFX1100-SDAG-NEXT:    v_add_f32_e32 v0, v1, v0
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-GISEL-LABEL: v_log_f32_nnan_ninf_daz:
@@ -5322,9 +5311,9 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 {
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_add_f32_e32 v0, v1, v2
+; GFX1100-SDAG-NEXT:    v_add_f32_e32 v0, v1, v0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -5548,7 +5537,7 @@ define float @v_log_f32_dynamic_mode(float %in) #1 {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
@@ -5722,7 +5711,7 @@ define float @v_log_f32_undef() {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -5887,7 +5876,7 @@ define float @v_log_f32_0() {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -6041,7 +6030,7 @@ define float @v_log_f32_from_fpext_f16(i16 %src.i) {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -6212,7 +6201,7 @@ define float @v_log_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -6390,7 +6379,7 @@ define float @v_log_f32_from_fpext_bf16(bfloat %src) {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3377d1cf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
index ab6325216c06d9f..2e5bf2e5609512b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
@@ -203,7 +203,7 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -2269,7 +2269,7 @@ define float @v_log10_f32(float %in) {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
@@ -2472,7 +2472,7 @@ define float @v_log10_fabs_f32(float %in) {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -2675,7 +2675,7 @@ define float @v_log10_fneg_fabs_f32(float %in) {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -2878,7 +2878,7 @@ define float @v_log10_fneg_f32(float %in) {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
@@ -3015,9 +3015,7 @@ define float @v_log10_f32_fast(float %in) {
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v1, 0x3e9a209b, v0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3e9a209b, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-GISEL-LABEL: v_log10_f32_fast:
@@ -3135,9 +3133,7 @@ define float @v_log10_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v1, 0x3e9a209b, v0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3e9a209b, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-GISEL-LABEL: v_log10_f32_unsafe_math_attr:
@@ -3255,9 +3251,7 @@ define float @v_log10_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true"
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v1, 0x3e9a209b, v0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3e9a209b, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-GISEL-LABEL: v_log10_f32_approx_fn_attr:
@@ -3441,7 +3435,7 @@ define float @v_log10_f32_ninf(float %in) {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
@@ -3577,9 +3571,7 @@ define float @v_log10_f32_afn(float %in) {
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v1, 0x3e9a209b, v0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3e9a209b, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-GISEL-LABEL: v_log10_f32_afn:
@@ -3726,9 +3718,7 @@ define float @v_log10_f32_afn_dynamic(float %in) #1 {
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v1, 0x3e9a209b, v0
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3e9a209b, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-GISEL-LABEL: v_log10_f32_afn_dynamic:
@@ -3844,11 +3834,10 @@ define float @v_fabs_log10_f32_afn(float %in) {
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0xc11a209b, s0
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e64 v0, |v0|, v2
-; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-SDAG-NEXT:    s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v1, 0x3e9a209b, v0
-; GFX1100-SDAG-NEXT:    v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3e9a209b, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-GISEL-LABEL: v_fabs_log10_f32_afn:
@@ -3982,7 +3971,7 @@ define float @v_log10_f32_daz(float %in) #0 {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -4174,7 +4163,7 @@ define float @v_log10_f32_nnan(float %in) {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
@@ -4324,7 +4313,7 @@ define float @v_log10_f32_nnan_daz(float %in) #0 {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -4516,7 +4505,7 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
@@ -4666,7 +4655,7 @@ define float @v_log10_f32_ninf_daz(float %in) #0 {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -4858,7 +4847,7 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
@@ -5041,9 +5030,9 @@ define float @v_log10_f32_nnan_ninf(float %in) {
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_add_f32_e32 v0, v1, v2
+; GFX1100-SDAG-NEXT:    v_add_f32_e32 v0, v1, v0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -5152,9 +5141,9 @@ define float @v_log10_f32_nnan_ninf_daz(float %in) #0 {
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_add_f32_e32 v0, v1, v2
+; GFX1100-SDAG-NEXT:    v_add_f32_e32 v0, v1, v0
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX1100-GISEL-LABEL: v_log10_f32_nnan_ninf_daz:
@@ -5322,9 +5311,9 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 {
 ; GFX1100-SDAG-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v0, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_add_f32_e32 v0, v1, v2
+; GFX1100-SDAG-NEXT:    v_add_f32_e32 v0, v1, v0
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
 ; GFX1100-SDAG-NEXT:    v_sub_f32_e32 v0, v0, v1
 ; GFX1100-SDAG-NEXT:    s_setpc_b64 s[30:31]
@@ -5548,7 +5537,7 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
@@ -5722,7 +5711,7 @@ define float @v_log10_f32_undef() {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -5887,7 +5876,7 @@ define float @v_log10_f32_0() {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -6041,7 +6030,7 @@ define float @v_log10_f32_from_fpext_f16(i16 %src.i) {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -6212,7 +6201,7 @@ define float @v_log10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -6390,7 +6379,7 @@ define float @v_log10_f32_from_fpext_bf16(bfloat %src) {
 ; GFX1100-SDAG-NEXT:    v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT:    v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT:    v_fmamk_f32 v2, v0, 0x3284fbcf, v2
 ; GFX1100-SDAG-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX1100-SDAG-NEXT:    v_add_f32_e32 v1, v1, v2
 ; GFX1100-SDAG-NEXT:    v_cndmask_b32_e64 v0, v0, v1, s0
diff --git a/llvm/test/CodeGen/AMDGPU/madmk.ll b/llvm/test/CodeGen/AMDGPU/madmk.ll
index 51a0a50fbbff5c3..00e226291e68b6f 100644
--- a/llvm/test/CodeGen/AMDGPU/madmk.ll
+++ b/llvm/test/CodeGen/AMDGPU/madmk.ll
@@ -11,7 +11,7 @@ declare float @llvm.fabs.f32(float) nounwind readnone
 ; GCN-LABEL: {{^}}madmk_f32:
 ; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 glc{{$}}
 ; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; GCN: v_mac_f32_e32 [[VB]], 0x41200000, [[VA]]
+; GCN: v_madmk_f32 {{v[0-9]+}}, [[VA]], 0x41200000, [[VB]]
 define amdgpu_kernel void @madmk_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
@@ -96,7 +96,7 @@ define amdgpu_kernel void @s_s_madmk_f32(ptr addrspace(1) noalias %out, [8 x i32
 ; GCN-DAG: s_load_dword [[SREG:s[0-9]+]]
 ; GCN-DAG: buffer_load_dword [[VREG1:v[0-9]+]]
 ; GCN: v_mov_b32_e32 [[VREG2:v[0-9]+]], [[SREG]]
-; GCN: v_mac_f32_e32 [[VREG2]], 0x41200000, [[VREG1]]
+; GCN: v_madmk_f32 {{v[0-9]+}}, [[VREG1]], 0x41200000, [[VREG2]]
 ; GCN: s_endpgm
 define amdgpu_kernel void @v_s_madmk_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, float %b) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -171,8 +171,9 @@ define amdgpu_kernel void @no_madmk_src2_modifier_f32(ptr addrspace(1) noalias %
 
 ; GCN-LABEL: {{^}}madmk_add_inline_imm_f32:
 ; GCN: buffer_load_dword [[A:v[0-9]+]]
-; GCN: s_mov_b32 [[SK:s[0-9]+]], 0x41200000
-; GCN: v_mad_f32 {{v[0-9]+}}, [[A]], [[SK]], 2.0
+; GCN: v_mov_b32_e32 [[B:v[0-9]+]], 2.0
+; GCN: v_madmk_f32 {{v[0-9]+}}, [[A]], 0x41200000, [[B]]
+
 define amdgpu_kernel void @madmk_add_inline_imm_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
   %tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
   %gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
diff --git a/llvm/test/CodeGen/AMDGPU/operand-folding.ll b/llvm/test/CodeGen/AMDGPU/operand-folding.ll
index a399b509014dd73..b54cc27db1d12cb 100644
--- a/llvm/test/CodeGen/AMDGPU/operand-folding.ll
+++ b/llvm/test/CodeGen/AMDGPU/operand-folding.ll
@@ -112,8 +112,8 @@ entry:
 ; A subregister use operand should not be tied.
 ; CHECK-LABEL: {{^}}no_fold_tied_subregister:
 ; CHECK: buffer_load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
-; CHECK: v_mac_f32_e32 v[[LO]], 0x41200000, v[[HI]]
-; CHECK: buffer_store_dword v[[LO]]
+; CHECK: v_madmk_f32 v[[RES:[0-9]+]], v[[HI]], 0x41200000, v[[LO]]
+; CHECK: buffer_store_dword v[[RES]]
 define amdgpu_kernel void @no_fold_tied_subregister() #1 {
   %tmp1 = load volatile <2 x float>, ptr addrspace(1) undef
   %tmp2 = extractelement <2 x float> %tmp1, i32 0
diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
index 705a2af73959065..4f2fd3f50494c94 100644
--- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
@@ -19,14 +19,14 @@ define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GCN-NEXT:    s_sub_u32 s4, 0, s10
 ; GCN-NEXT:    s_subb_u32 s5, 0, s11
 ; GCN-NEXT:    s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GCN-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; GCN-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
 ; GCN-NEXT:    v_rcp_f32_e32 v0, v0
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
 ; GCN-NEXT:    s_ashr_i32 s12, s3, 31
 ; GCN-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GCN-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GCN-NEXT:    v_trunc_f32_e32 v1, v1
-; GCN-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GCN-NEXT:    s_add_u32 s2, s2, s12
@@ -247,12 +247,12 @@ define i64 @v_test_sdiv(i64 %x, i64 %y) {
 ; GCN-NEXT:    v_cvt_f32_u32_e32 v6, v2
 ; GCN-NEXT:    v_sub_i32_e32 v7, vcc, 0, v3
 ; GCN-NEXT:    v_subb_u32_e32 v8, vcc, 0, v2, vcc
-; GCN-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v6
+; GCN-NEXT:    v_madmk_f32 v5, v6, 0x4f800000, v5
 ; GCN-NEXT:    v_rcp_f32_e32 v5, v5
 ; GCN-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
 ; GCN-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v5
 ; GCN-NEXT:    v_trunc_f32_e32 v6, v6
-; GCN-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v6
+; GCN-NEXT:    v_madmk_f32 v5, v6, 0xcf800000, v5
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v5, v5
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v6, v6
 ; GCN-NEXT:    v_mul_hi_u32 v9, v7, v5
@@ -1093,12 +1093,12 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
 ; GCN-NEXT:    v_cvt_f32_u32_e32 v1, s3
 ; GCN-NEXT:    s_sub_u32 s4, 0, s2
 ; GCN-NEXT:    s_subb_u32 s5, 0, s3
-; GCN-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; GCN-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
 ; GCN-NEXT:    v_rcp_f32_e32 v0, v0
 ; GCN-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GCN-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GCN-NEXT:    v_trunc_f32_e32 v1, v1
-; GCN-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GCN-NEXT:    v_mul_lo_u32 v2, s4, v1
@@ -1287,12 +1287,12 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) {
 ; GCN-NEXT:    v_cvt_f32_u32_e32 v4, v1
 ; GCN-NEXT:    v_sub_i32_e32 v5, vcc, 0, v0
 ; GCN-NEXT:    v_subb_u32_e32 v6, vcc, 0, v1, vcc
-; GCN-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v4
+; GCN-NEXT:    v_madmk_f32 v3, v4, 0x4f800000, v3
 ; GCN-NEXT:    v_rcp_f32_e32 v3, v3
 ; GCN-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v3
 ; GCN-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v3
 ; GCN-NEXT:    v_trunc_f32_e32 v4, v4
-; GCN-NEXT:    v_mac_f32_e32 v3, 0xcf800000, v4
+; GCN-NEXT:    v_madmk_f32 v3, v4, 0xcf800000, v3
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; GCN-NEXT:    v_mul_hi_u32 v7, v5, v3
@@ -1484,12 +1484,12 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) {
 ; GCN-NEXT:    v_cvt_f32_u32_e32 v4, v1
 ; GCN-NEXT:    v_sub_i32_e32 v5, vcc, 0, v0
 ; GCN-NEXT:    v_subb_u32_e32 v6, vcc, 0, v1, vcc
-; GCN-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v4
+; GCN-NEXT:    v_madmk_f32 v3, v4, 0x4f800000, v3
 ; GCN-NEXT:    v_rcp_f32_e32 v3, v3
 ; GCN-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v3
 ; GCN-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v3
 ; GCN-NEXT:    v_trunc_f32_e32 v4, v4
-; GCN-NEXT:    v_mac_f32_e32 v3, 0xcf800000, v4
+; GCN-NEXT:    v_madmk_f32 v3, v4, 0xcf800000, v3
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; GCN-NEXT:    v_mul_hi_u32 v7, v5, v3
diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll
index 613349f32e2d5ad..24319a639da4472 100644
--- a/llvm/test/CodeGen/AMDGPU/srem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/srem64.ll
@@ -15,13 +15,13 @@ define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) {
 ; GCN-NEXT:    s_sub_u32 s0, 0, s12
 ; GCN-NEXT:    s_subb_u32 s1, 0, s13
 ; GCN-NEXT:    s_mov_b32 s4, s8
-; GCN-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; GCN-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
 ; GCN-NEXT:    v_rcp_f32_e32 v0, v0
 ; GCN-NEXT:    s_mov_b32 s5, s9
 ; GCN-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GCN-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GCN-NEXT:    v_trunc_f32_e32 v1, v1
-; GCN-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GCN-NEXT:    v_mul_lo_u32 v2, s0, v1
@@ -226,12 +226,12 @@ define i64 @v_test_srem(i64 %x, i64 %y) {
 ; GCN-NEXT:    v_cvt_f32_u32_e32 v5, v2
 ; GCN-NEXT:    v_sub_i32_e32 v6, vcc, 0, v3
 ; GCN-NEXT:    v_subb_u32_e32 v7, vcc, 0, v2, vcc
-; GCN-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GCN-NEXT:    v_madmk_f32 v4, v5, 0x4f800000, v4
 ; GCN-NEXT:    v_rcp_f32_e32 v4, v4
 ; GCN-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; GCN-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
 ; GCN-NEXT:    v_trunc_f32_e32 v5, v5
-; GCN-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; GCN-NEXT:    v_madmk_f32 v4, v5, 0xcf800000, v4
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v5, v5
 ; GCN-NEXT:    v_mul_hi_u32 v8, v6, v4
@@ -894,7 +894,7 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
 ; GCN-NEXT:    s_sub_u32 s0, 0, s12
 ; GCN-NEXT:    s_subb_u32 s1, 0, s13
 ; GCN-NEXT:    s_ashr_i32 s6, s7, 31
-; GCN-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; GCN-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
 ; GCN-NEXT:    v_rcp_f32_e32 v0, v0
 ; GCN-NEXT:    s_mov_b32 s7, s6
 ; GCN-NEXT:    s_mov_b32 s8, s4
@@ -902,7 +902,7 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
 ; GCN-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GCN-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GCN-NEXT:    v_trunc_f32_e32 v1, v1
-; GCN-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GCN-NEXT:    v_mul_lo_u32 v2, s0, v1
@@ -1290,13 +1290,13 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(ptr addrspace(1) %out, i64 %x)
 ; GCN-NEXT:    s_sub_u32 s2, 0, s8
 ; GCN-NEXT:    s_subb_u32 s3, 0, s9
 ; GCN-NEXT:    s_mov_b32 s4, s0
-; GCN-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; GCN-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
 ; GCN-NEXT:    v_rcp_f32_e32 v0, v0
 ; GCN-NEXT:    s_mov_b32 s5, s1
 ; GCN-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GCN-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GCN-NEXT:    v_trunc_f32_e32 v1, v1
-; GCN-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GCN-NEXT:    v_mul_lo_u32 v2, s2, v1
@@ -1481,12 +1481,12 @@ define i64 @v_test_srem_k_num_i64(i64 %x) {
 ; GCN-NEXT:    v_cvt_f32_u32_e32 v3, v1
 ; GCN-NEXT:    v_sub_i32_e32 v4, vcc, 0, v0
 ; GCN-NEXT:    v_subb_u32_e32 v5, vcc, 0, v1, vcc
-; GCN-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; GCN-NEXT:    v_madmk_f32 v2, v3, 0x4f800000, v2
 ; GCN-NEXT:    v_rcp_f32_e32 v2, v2
 ; GCN-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; GCN-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
 ; GCN-NEXT:    v_trunc_f32_e32 v3, v3
-; GCN-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
+; GCN-NEXT:    v_madmk_f32 v2, v3, 0xcf800000, v2
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v2, v2
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; GCN-NEXT:    v_mul_hi_u32 v6, v4, v2
@@ -1676,12 +1676,12 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) {
 ; GCN-NEXT:    v_cvt_f32_u32_e32 v3, v1
 ; GCN-NEXT:    v_sub_i32_e32 v4, vcc, 0, v0
 ; GCN-NEXT:    v_subb_u32_e32 v5, vcc, 0, v1, vcc
-; GCN-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; GCN-NEXT:    v_madmk_f32 v2, v3, 0x4f800000, v2
 ; GCN-NEXT:    v_rcp_f32_e32 v2, v2
 ; GCN-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; GCN-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
 ; GCN-NEXT:    v_trunc_f32_e32 v3, v3
-; GCN-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
+; GCN-NEXT:    v_madmk_f32 v2, v3, 0xcf800000, v2
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v2, v2
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; GCN-NEXT:    v_mul_hi_u32 v6, v4, v2
diff --git a/llvm/test/CodeGen/AMDGPU/udiv.ll b/llvm/test/CodeGen/AMDGPU/udiv.ll
index cf30131b8ab58ab..012b3f976734dec 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv.ll
@@ -2527,7 +2527,7 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
 ; SI-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; SI-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
 ; SI-NEXT:    v_trunc_f32_e32 v3, v3
-; SI-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
+; SI-NEXT:    v_madmk_f32 v2, v3, 0xcf800000, v2
 ; SI-NEXT:    v_cvt_u32_f32_e32 v2, v2
 ; SI-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; SI-NEXT:    v_mul_hi_u32 v4, v2, s4
@@ -2626,7 +2626,7 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
 ; VI-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; VI-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
 ; VI-NEXT:    v_trunc_f32_e32 v3, v3
-; VI-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
+; VI-NEXT:    v_madmk_f32 v2, v3, 0xcf800000, v2
 ; VI-NEXT:    v_cvt_u32_f32_e32 v6, v2
 ; VI-NEXT:    v_cvt_u32_f32_e32 v7, v3
 ; VI-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0
@@ -2713,7 +2713,7 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
 ; GCN-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; GCN-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
 ; GCN-NEXT:    v_trunc_f32_e32 v3, v3
-; GCN-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
+; GCN-NEXT:    v_madmk_f32 v2, v3, 0xcf800000, v2
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v6, v2
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v7, v3
 ; GCN-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0
diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll
index c5ab44e31c0320d..e23f3cfad89bc88 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll
@@ -14,12 +14,12 @@ define amdgpu_kernel void @s_test_udiv_i64(ptr addrspace(1) %out, i64 %x, i64 %y
 ; GCN-NEXT:    v_cvt_f32_u32_e32 v1, s9
 ; GCN-NEXT:    s_sub_u32 s4, 0, s8
 ; GCN-NEXT:    s_subb_u32 s5, 0, s9
-; GCN-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; GCN-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
 ; GCN-NEXT:    v_rcp_f32_e32 v0, v0
 ; GCN-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GCN-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GCN-NEXT:    v_trunc_f32_e32 v1, v1
-; GCN-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GCN-NEXT:    v_mul_lo_u32 v2, s4, v1
@@ -211,12 +211,12 @@ define i64 @v_test_udiv_i64(i64 %x, i64 %y) {
 ; GCN-NEXT:    v_cvt_f32_u32_e32 v5, v3
 ; GCN-NEXT:    v_sub_i32_e32 v6, vcc, 0, v2
 ; GCN-NEXT:    v_subb_u32_e32 v7, vcc, 0, v3, vcc
-; GCN-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GCN-NEXT:    v_madmk_f32 v4, v5, 0x4f800000, v4
 ; GCN-NEXT:    v_rcp_f32_e32 v4, v4
 ; GCN-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; GCN-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
 ; GCN-NEXT:    v_trunc_f32_e32 v5, v5
-; GCN-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; GCN-NEXT:    v_madmk_f32 v4, v5, 0xcf800000, v4
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v5, v5
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; GCN-NEXT:    v_mul_lo_u32 v8, v6, v5
@@ -688,7 +688,7 @@ define amdgpu_kernel void @s_test_udiv24_i48(ptr addrspace(1) %out, i48 %x, i48
 ; GCN-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v1
 ; GCN-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v1
 ; GCN-NEXT:    v_trunc_f32_e32 v2, v2
-; GCN-NEXT:    v_mac_f32_e32 v1, 0xcf800000, v2
+; GCN-NEXT:    v_madmk_f32 v1, v2, 0xcf800000, v1
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v2, v2
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GCN-NEXT:    s_mov_b32 s2, -1
@@ -886,12 +886,12 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
 ; GCN-NEXT:    v_cvt_f32_u32_e32 v1, s3
 ; GCN-NEXT:    s_sub_u32 s4, 0, s2
 ; GCN-NEXT:    s_subb_u32 s5, 0, s3
-; GCN-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; GCN-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
 ; GCN-NEXT:    v_rcp_f32_e32 v0, v0
 ; GCN-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GCN-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GCN-NEXT:    v_trunc_f32_e32 v1, v1
-; GCN-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GCN-NEXT:    v_mul_lo_u32 v2, s4, v1
@@ -1067,12 +1067,12 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) {
 ; GCN-NEXT:    v_cvt_f32_u32_e32 v3, v1
 ; GCN-NEXT:    v_sub_i32_e32 v4, vcc, 0, v0
 ; GCN-NEXT:    v_subb_u32_e32 v5, vcc, 0, v1, vcc
-; GCN-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; GCN-NEXT:    v_madmk_f32 v2, v3, 0x4f800000, v2
 ; GCN-NEXT:    v_rcp_f32_e32 v2, v2
 ; GCN-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; GCN-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
 ; GCN-NEXT:    v_trunc_f32_e32 v3, v3
-; GCN-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
+; GCN-NEXT:    v_madmk_f32 v2, v3, 0xcf800000, v2
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v2, v2
 ; GCN-NEXT:    v_mul_lo_u32 v6, v4, v3
@@ -1335,7 +1335,7 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(ptr addrspace(1) %out, i64 %x)
 ; GCN-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GCN-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GCN-NEXT:    v_trunc_f32_e32 v1, v1
-; GCN-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GCN-NEXT:    s_mov_b32 s2, -1
@@ -1509,7 +1509,7 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) {
 ; GCN-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; GCN-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
 ; GCN-NEXT:    v_trunc_f32_e32 v3, v3
-; GCN-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
+; GCN-NEXT:    v_madmk_f32 v2, v3, 0xcf800000, v2
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v2, v2
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; GCN-NEXT:    v_mul_hi_u32 v4, v2, s4
diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll
index 894c96acbbcd6b1..f68d14a32b929a5 100644
--- a/llvm/test/CodeGen/AMDGPU/urem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/urem64.ll
@@ -15,13 +15,13 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y
 ; GCN-NEXT:    s_sub_u32 s0, 0, s12
 ; GCN-NEXT:    s_subb_u32 s1, 0, s13
 ; GCN-NEXT:    s_mov_b32 s4, s8
-; GCN-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; GCN-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
 ; GCN-NEXT:    v_rcp_f32_e32 v0, v0
 ; GCN-NEXT:    s_mov_b32 s5, s9
 ; GCN-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GCN-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GCN-NEXT:    v_trunc_f32_e32 v1, v1
-; GCN-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GCN-NEXT:    v_mul_lo_u32 v2, s0, v1
@@ -221,12 +221,12 @@ define i64 @v_test_urem_i64(i64 %x, i64 %y) {
 ; GCN-NEXT:    v_cvt_f32_u32_e32 v5, v3
 ; GCN-NEXT:    v_sub_i32_e32 v6, vcc, 0, v2
 ; GCN-NEXT:    v_subb_u32_e32 v7, vcc, 0, v3, vcc
-; GCN-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; GCN-NEXT:    v_madmk_f32 v4, v5, 0x4f800000, v4
 ; GCN-NEXT:    v_rcp_f32_e32 v4, v4
 ; GCN-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; GCN-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
 ; GCN-NEXT:    v_trunc_f32_e32 v5, v5
-; GCN-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v5
+; GCN-NEXT:    v_madmk_f32 v4, v5, 0xcf800000, v4
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v5, v5
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; GCN-NEXT:    v_mul_lo_u32 v8, v6, v5
@@ -716,13 +716,13 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x)
 ; GCN-NEXT:    s_sub_u32 s0, 0, s6
 ; GCN-NEXT:    s_subb_u32 s1, 0, s7
 ; GCN-NEXT:    s_mov_b32 s8, s4
-; GCN-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; GCN-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
 ; GCN-NEXT:    v_rcp_f32_e32 v0, v0
 ; GCN-NEXT:    s_mov_b32 s9, s5
 ; GCN-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GCN-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GCN-NEXT:    v_trunc_f32_e32 v1, v1
-; GCN-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GCN-NEXT:    v_mul_lo_u32 v2, s0, v1
@@ -903,7 +903,7 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(ptr addrspace(1) %out, i64 %x)
 ; GCN-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GCN-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GCN-NEXT:    v_trunc_f32_e32 v1, v1
-; GCN-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GCN-NEXT:    s_waitcnt lgkmcnt(0)
@@ -1086,12 +1086,12 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) {
 ; GCN-NEXT:    v_cvt_f32_u32_e32 v3, v1
 ; GCN-NEXT:    v_sub_i32_e32 v4, vcc, 0, v0
 ; GCN-NEXT:    v_subb_u32_e32 v5, vcc, 0, v1, vcc
-; GCN-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; GCN-NEXT:    v_madmk_f32 v2, v3, 0x4f800000, v2
 ; GCN-NEXT:    v_rcp_f32_e32 v2, v2
 ; GCN-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; GCN-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
 ; GCN-NEXT:    v_trunc_f32_e32 v3, v3
-; GCN-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v3
+; GCN-NEXT:    v_madmk_f32 v2, v3, 0xcf800000, v2
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v3, v3
 ; GCN-NEXT:    v_cvt_u32_f32_e32 v2, v2
 ; GCN-NEXT:    v_mul_lo_u32 v6, v4, v3
diff --git a/llvm/test/CodeGen/AMDGPU/v_mac.ll b/llvm/test/CodeGen/AMDGPU/v_mac.ll
index 3df7f3c26aad82e..2b5762e1fa2a52f 100644
--- a/llvm/test/CodeGen/AMDGPU/v_mac.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_mac.ll
@@ -253,7 +253,7 @@ bb:
 
 ; SI: v_add_f32_e32 [[TMP2:v[0-9]+]], [[CVT_A]], [[CVT_A]]
 ; SI: v_mad_f32 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
-; SI: v_mac_f32_e32 v{{[0-9]+}}, 0x41000000, v{{[0-9]+}}
+; SI: v_madmk_f32 v{{[0-9]+}}, v{{[0-9]+}}, 0x41000000, v{{[0-9]+}}
 
 ; VI-FLUSH: v_add_f16_e32 [[TMP2:v[0-9]+]], [[A]], [[A]]
 ; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index 94b822ac4887577..cadc23414dcac11 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -743,12 +743,12 @@ define amdgpu_kernel void @test_udiv64(ptr addrspace(1) %arg) #0 {
 ; GFX1032-NEXT:    v_cvt_f32_u32_e32 v1, s5
 ; GFX1032-NEXT:    s_sub_u32 s9, 0, s4
 ; GFX1032-NEXT:    s_subb_u32 s10, 0, s5
-; GFX1032-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; GFX1032-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
 ; GFX1032-NEXT:    v_rcp_f32_e32 v0, v0
 ; GFX1032-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX1032-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX1032-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX1032-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX1032-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GFX1032-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX1032-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX1032-NEXT:    v_readfirstlane_b32 s0, v1
@@ -905,12 +905,12 @@ define amdgpu_kernel void @test_udiv64(ptr addrspace(1) %arg) #0 {
 ; GFX1064-NEXT:    v_cvt_f32_u32_e32 v1, s5
 ; GFX1064-NEXT:    s_sub_u32 s9, 0, s4
 ; GFX1064-NEXT:    s_subb_u32 s10, 0, s5
-; GFX1064-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; GFX1064-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
 ; GFX1064-NEXT:    v_rcp_f32_e32 v0, v0
 ; GFX1064-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; GFX1064-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; GFX1064-NEXT:    v_trunc_f32_e32 v1, v1
-; GFX1064-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX1064-NEXT:    v_madmk_f32 v0, v1, 0xcf800000, v0
 ; GFX1064-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; GFX1064-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; GFX1064-NEXT:    v_readfirstlane_b32 s8, v1



More information about the llvm-commits mailing list