[llvm] [AMDGPU][CodeGen] Fold immediates in src1 operands of V_MAD/MAC/FMA/FMAC. (PR #68002)
Ivan Kosarev via llvm-commits
llvm-commits at lists.llvm.org
Mon Oct 2 09:19:23 PDT 2023
https://github.com/kosarev created https://github.com/llvm/llvm-project/pull/68002
None
>From deeb33d066452e574fbf521b235405b33b3870bd Mon Sep 17 00:00:00 2001
From: Ivan Kosarev <ivan.kosarev at amd.com>
Date: Mon, 2 Oct 2023 17:14:23 +0100
Subject: [PATCH] [AMDGPU][CodeGen] Fold immediates in src1 operands of
V_MAD/MAC/FMA/FMAC.
---
llvm/lib/Target/AMDGPU/SIInstrInfo.cpp | 25 +-
.../AMDGPU/amdgpu-codegenprepare-idiv.ll | 52 ++--
llvm/test/CodeGen/AMDGPU/bypass-div.ll | 226 +++++++++---------
.../test/CodeGen/AMDGPU/carryout-selection.ll | 28 +--
.../CodeGen/AMDGPU/dagcombine-fma-fmad.ll | 79 +++---
.../AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll | 2 +-
llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll | 24 +-
llvm/test/CodeGen/AMDGPU/llvm.log.ll | 73 +++---
llvm/test/CodeGen/AMDGPU/llvm.log10.ll | 73 +++---
llvm/test/CodeGen/AMDGPU/madmk.ll | 9 +-
llvm/test/CodeGen/AMDGPU/operand-folding.ll | 4 +-
llvm/test/CodeGen/AMDGPU/sdiv64.ll | 20 +-
llvm/test/CodeGen/AMDGPU/srem64.ll | 24 +-
llvm/test/CodeGen/AMDGPU/udiv.ll | 6 +-
llvm/test/CodeGen/AMDGPU/udiv64.ll | 22 +-
llvm/test/CodeGen/AMDGPU/urem64.ll | 18 +-
llvm/test/CodeGen/AMDGPU/v_mac.ll | 2 +-
llvm/test/CodeGen/AMDGPU/wave32.ll | 8 +-
18 files changed, 339 insertions(+), 356 deletions(-)
diff --git a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
index 2799a3e78b04d22..92b5aa5f44db915 100644
--- a/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@@ -3250,9 +3250,12 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
MachineOperand *Src2 = getNamedOperand(UseMI, AMDGPU::OpName::src2);
// Multiplied part is the constant: Use v_madmk_{f16, f32}.
- // We should only expect these to be on src0 due to canonicalization.
- if (Src0->isReg() && Src0->getReg() == Reg) {
- if (!Src1->isReg() || RI.isSGPRClass(MRI->getRegClass(Src1->getReg())))
+ if ((Src0->isReg() && Src0->getReg() == Reg) ||
+ (Src1->isReg() && Src1->getReg() == Reg)) {
+ MachineOperand *RegSrc =
+ Src1->isReg() && Src1->getReg() == Reg ? Src0 : Src1;
+ if (!RegSrc->isReg() ||
+ RI.isSGPRClass(MRI->getRegClass(RegSrc->getReg())))
return false;
if (!Src2->isReg() || RI.isSGPRClass(MRI->getRegClass(Src2->getReg())))
@@ -3266,18 +3269,22 @@ bool SIInstrInfo::FoldImmediate(MachineInstr &UseMI, MachineInstr &DefMI,
if (pseudoToMCOpcode(NewOpc) == -1)
return false;
- // We need to swap operands 0 and 1 since madmk constant is at operand 1.
+ // V_FMAMK_F16_t16 takes VGPR_32_Lo128 operands, so the rewrite
+ // would also require restricting their register classes. For now
+ // just bail out.
+ if (NewOpc == AMDGPU::V_FMAMK_F16_t16)
+ return false;
const int64_t Imm = ImmOp->getImm();
// FIXME: This would be a lot easier if we could return a new instruction
// instead of having to modify in place.
- Register Src1Reg = Src1->getReg();
- unsigned Src1SubReg = Src1->getSubReg();
- Src0->setReg(Src1Reg);
- Src0->setSubReg(Src1SubReg);
- Src0->setIsKill(Src1->isKill());
+ Register SrcReg = RegSrc->getReg();
+ unsigned SrcSubReg = RegSrc->getSubReg();
+ Src0->setReg(SrcReg);
+ Src0->setSubReg(SrcSubReg);
+ Src0->setIsKill(RegSrc->isKill());
if (Opc == AMDGPU::V_MAC_F32_e64 || Opc == AMDGPU::V_MAC_F16_e64 ||
Opc == AMDGPU::V_FMAC_F32_e64 || Opc == AMDGPU::V_FMAC_F16_t16_e64 ||
diff --git a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
index 1b8216f4aa2a61f..c793f9ee682f8c4 100644
--- a/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/amdgpu-codegenprepare-idiv.ll
@@ -7149,7 +7149,7 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX6-NEXT: v_trunc_f32_e32 v1, v1
-; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX6-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX6-NEXT: s_movk_i32 s8, 0x11f
@@ -7269,7 +7269,7 @@ define amdgpu_kernel void @udiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX9-NEXT: v_trunc_f32_e32 v1, v1
-; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX9-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
@@ -7533,21 +7533,21 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out, <
; GFX6-NEXT: v_madak_f32 v0, 0, v0, 0x457ff000
; GFX6-NEXT: v_rcp_f32_e32 v0, v0
; GFX6-NEXT: s_movk_i32 s6, 0xf001
-; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
-; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd
; GFX6-NEXT: s_movk_i32 s8, 0xfff
+; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX6-NEXT: v_trunc_f32_e32 v1, v1
-; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX6-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1
-; GFX6-NEXT: s_waitcnt lgkmcnt(0)
-; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], 12
-; GFX6-NEXT: s_mov_b32 s7, 0xf000
+; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
+; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd
; GFX6-NEXT: v_mul_hi_u32 v2, v0, s6
; GFX6-NEXT: v_mul_lo_u32 v4, v1, s6
; GFX6-NEXT: v_mul_lo_u32 v3, v0, s6
+; GFX6-NEXT: s_waitcnt lgkmcnt(0)
+; GFX6-NEXT: s_lshr_b64 s[0:1], s[0:1], 12
; GFX6-NEXT: v_sub_i32_e32 v2, vcc, v2, v0
; GFX6-NEXT: v_add_i32_e32 v2, vcc, v2, v4
; GFX6-NEXT: v_mul_hi_u32 v5, v0, v3
@@ -7647,7 +7647,7 @@ define amdgpu_kernel void @udiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out, <
; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX9-NEXT: v_trunc_f32_e32 v1, v1
-; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX9-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -7834,7 +7834,7 @@ define amdgpu_kernel void @urem_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX6-NEXT: v_trunc_f32_e32 v1, v1
-; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX6-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
@@ -7954,7 +7954,7 @@ define amdgpu_kernel void @urem_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX9-NEXT: v_trunc_f32_e32 v1, v1
-; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX9-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX9-NEXT: v_readfirstlane_b32 s0, v0
@@ -8283,7 +8283,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX6-NEXT: v_trunc_f32_e32 v1, v1
-; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX6-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
@@ -8399,7 +8399,7 @@ define amdgpu_kernel void @sdiv_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX9-NEXT: v_trunc_f32_e32 v1, v1
-; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX9-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX9-NEXT: v_readfirstlane_b32 s4, v1
@@ -8589,14 +8589,14 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
; GFX6-NEXT: s_sub_u32 s4, 0, s10
; GFX6-NEXT: s_subb_u32 s5, 0, s11
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX6-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
+; GFX6-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GFX6-NEXT: v_rcp_f32_e32 v0, v0
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_ashr_i32 s12, s3, 31
; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX6-NEXT: v_trunc_f32_e32 v1, v1
-; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX6-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX6-NEXT: s_add_u32 s2, s2, s12
@@ -8724,13 +8724,13 @@ define amdgpu_kernel void @sdiv_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX9-NEXT: s_sub_u32 s0, 0, s8
; GFX9-NEXT: s_subb_u32 s1, 0, s9
-; GFX9-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
+; GFX9-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GFX9-NEXT: v_rcp_f32_e32 v1, v0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1
; GFX9-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1
; GFX9-NEXT: v_trunc_f32_e32 v2, v2
-; GFX9-NEXT: v_mac_f32_e32 v1, 0xcf800000, v2
+; GFX9-NEXT: v_madmk_f32 v1, v2, 0xcf800000, v1
; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2
; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX9-NEXT: v_readfirstlane_b32 s10, v2
@@ -8944,14 +8944,14 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out,
; GFX6-NEXT: v_mov_b32_e32 v1, 0x4f800000
; GFX6-NEXT: v_mac_f32_e32 v0, 0, v1
; GFX6-NEXT: v_rcp_f32_e32 v0, v0
-; GFX6-NEXT: s_movk_i32 s6, 0xf001
; GFX6-NEXT: s_load_dwordx2 s[4:5], s[0:1], 0x9
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0xd
+; GFX6-NEXT: s_movk_i32 s6, 0xf001
; GFX6-NEXT: s_mov_b32 s7, 0xf000
; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX6-NEXT: v_trunc_f32_e32 v1, v1
-; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX6-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
@@ -9073,7 +9073,7 @@ define amdgpu_kernel void @ssdiv_v2i64_mixed_pow2k_denom(ptr addrspace(1) %out,
; GFX9-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1
; GFX9-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1
; GFX9-NEXT: v_trunc_f32_e32 v2, v2
-; GFX9-NEXT: v_mac_f32_e32 v1, 0xcf800000, v2
+; GFX9-NEXT: v_madmk_f32 v1, v2, 0xcf800000, v1
; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2
; GFX9-NEXT: s_waitcnt lgkmcnt(0)
@@ -9789,7 +9789,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX6-NEXT: v_trunc_f32_e32 v1, v1
-; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX6-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX6-NEXT: s_mov_b32 s2, -1
@@ -9903,7 +9903,7 @@ define amdgpu_kernel void @srem_i64_oddk_denom(ptr addrspace(1) %out, i64 %x) {
; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX9-NEXT: v_trunc_f32_e32 v1, v1
-; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX9-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX9-NEXT: v_readfirstlane_b32 s4, v1
@@ -10093,14 +10093,14 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
; GFX6-NEXT: s_sub_u32 s4, 0, s8
; GFX6-NEXT: s_subb_u32 s5, 0, s9
; GFX6-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GFX6-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
+; GFX6-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GFX6-NEXT: v_rcp_f32_e32 v0, v0
; GFX6-NEXT: s_waitcnt lgkmcnt(0)
; GFX6-NEXT: s_ashr_i32 s10, s3, 31
; GFX6-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX6-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX6-NEXT: v_trunc_f32_e32 v1, v1
-; GFX6-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX6-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX6-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX6-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX6-NEXT: s_add_u32 s2, s2, s10
@@ -10226,13 +10226,13 @@ define amdgpu_kernel void @srem_i64_pow2_shl_denom(ptr addrspace(1) %out, i64 %x
; GFX9-NEXT: s_load_dwordx4 s[4:7], s[0:1], 0x24
; GFX9-NEXT: s_sub_u32 s0, 0, s8
; GFX9-NEXT: s_subb_u32 s1, 0, s9
-; GFX9-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
+; GFX9-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GFX9-NEXT: v_rcp_f32_e32 v1, v0
; GFX9-NEXT: v_mov_b32_e32 v0, 0
; GFX9-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1
; GFX9-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1
; GFX9-NEXT: v_trunc_f32_e32 v2, v2
-; GFX9-NEXT: v_mac_f32_e32 v1, 0xcf800000, v2
+; GFX9-NEXT: v_madmk_f32 v1, v2, 0xcf800000, v1
; GFX9-NEXT: v_cvt_u32_f32_e32 v2, v2
; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX9-NEXT: v_readfirstlane_b32 s2, v2
diff --git a/llvm/test/CodeGen/AMDGPU/bypass-div.ll b/llvm/test/CodeGen/AMDGPU/bypass-div.ll
index 2184478635e0e38..cb1b664549c9a30 100644
--- a/llvm/test/CodeGen/AMDGPU/bypass-div.ll
+++ b/llvm/test/CodeGen/AMDGPU/bypass-div.ll
@@ -25,12 +25,12 @@ define i64 @sdiv64(i64 %a, i64 %b) {
; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v10
; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, 0, v11
; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, 0, v10, vcc
-; GFX9-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
+; GFX9-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2
; GFX9-NEXT: v_rcp_f32_e32 v2, v2
; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; GFX9-NEXT: v_trunc_f32_e32 v3, v3
-; GFX9-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
+; GFX9-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v2
; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v3
; GFX9-NEXT: v_mul_lo_u32 v4, v8, v6
@@ -171,12 +171,12 @@ define i64 @udiv64(i64 %a, i64 %b) {
; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3
; GFX9-NEXT: v_sub_co_u32_e32 v10, vcc, 0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v3, vcc
-; GFX9-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
+; GFX9-NEXT: v_madmk_f32 v4, v5, 0x4f800000, v4
; GFX9-NEXT: v_rcp_f32_e32 v4, v4
; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
; GFX9-NEXT: v_trunc_f32_e32 v5, v5
-; GFX9-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5
+; GFX9-NEXT: v_madmk_f32 v4, v5, 0xcf800000, v4
; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v5
; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v4
; GFX9-NEXT: v_mul_lo_u32 v6, v10, v8
@@ -312,12 +312,12 @@ define i64 @srem64(i64 %a, i64 %b) {
; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v9
; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, 0, v10
; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, 0, v9, vcc
-; GFX9-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
+; GFX9-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2
; GFX9-NEXT: v_rcp_f32_e32 v2, v2
; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; GFX9-NEXT: v_trunc_f32_e32 v3, v3
-; GFX9-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
+; GFX9-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v2
; GFX9-NEXT: v_cvt_u32_f32_e32 v11, v3
; GFX9-NEXT: v_mul_lo_u32 v4, v8, v6
@@ -454,12 +454,12 @@ define i64 @urem64(i64 %a, i64 %b) {
; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3
; GFX9-NEXT: v_sub_co_u32_e32 v10, vcc, 0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v3, vcc
-; GFX9-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
+; GFX9-NEXT: v_madmk_f32 v4, v5, 0x4f800000, v4
; GFX9-NEXT: v_rcp_f32_e32 v4, v4
; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
; GFX9-NEXT: v_trunc_f32_e32 v5, v5
-; GFX9-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5
+; GFX9-NEXT: v_madmk_f32 v4, v5, 0xcf800000, v4
; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v5
; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v4
; GFX9-NEXT: v_mul_lo_u32 v6, v10, v8
@@ -709,118 +709,118 @@ define <2 x i64> @sdivrem64(i64 %a, i64 %b) {
; GFX9-NEXT: s_xor_b64 s[10:11], exec, s[4:5]
; GFX9-NEXT: s_cbranch_execz .LBB8_2
; GFX9-NEXT: ; %bb.1:
-; GFX9-NEXT: v_ashrrev_i32_e32 v11, 31, v3
-; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v2, v11
-; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v3, v11, vcc
-; GFX9-NEXT: v_xor_b32_e32 v2, v2, v11
-; GFX9-NEXT: v_xor_b32_e32 v3, v4, v11
-; GFX9-NEXT: v_cvt_f32_u32_e32 v4, v3
-; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v2
-; GFX9-NEXT: v_sub_co_u32_e32 v9, vcc, 0, v3
-; GFX9-NEXT: v_subb_co_u32_e32 v10, vcc, 0, v2, vcc
-; GFX9-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
-; GFX9-NEXT: v_rcp_f32_e32 v4, v4
-; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
-; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
-; GFX9-NEXT: v_trunc_f32_e32 v5, v5
-; GFX9-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5
-; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v4
-; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v5
-; GFX9-NEXT: v_mul_lo_u32 v6, v10, v8
-; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v9, v8, 0
-; GFX9-NEXT: v_mul_lo_u32 v7, v9, v12
-; GFX9-NEXT: v_mul_hi_u32 v13, v8, v4
-; GFX9-NEXT: v_add3_u32 v7, v5, v7, v6
-; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v8, v7, 0
-; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v13, v5
-; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v4, 0
-; GFX9-NEXT: v_addc_co_u32_e32 v14, vcc, 0, v6, vcc
-; GFX9-NEXT: v_mad_u64_u32 v[6:7], s[4:5], v12, v7, 0
-; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v13, v4
-; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v14, v5, vcc
-; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v7, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v6
-; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v5, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v8, v4
-; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, v12, v5, vcc
-; GFX9-NEXT: v_mul_lo_u32 v6, v9, v12
-; GFX9-NEXT: v_mul_lo_u32 v7, v10, v13
-; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v9, v13, 0
-; GFX9-NEXT: v_add3_u32 v7, v5, v6, v7
-; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v12, v7, 0
-; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v13, v7, 0
-; GFX9-NEXT: v_mul_hi_u32 v14, v13, v4
-; GFX9-NEXT: v_mad_u64_u32 v[9:10], s[4:5], v12, v4, 0
-; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v14, v7
-; GFX9-NEXT: v_addc_co_u32_e32 v7, vcc, 0, v8, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v9
-; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v7, v10, vcc
-; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, 0, v6, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v4, v5
+; GFX9-NEXT: v_ashrrev_i32_e32 v9, 31, v3
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v9
+; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v9, vcc
+; GFX9-NEXT: v_xor_b32_e32 v10, v3, v9
+; GFX9-NEXT: v_xor_b32_e32 v11, v2, v9
+; GFX9-NEXT: v_cvt_f32_u32_e32 v2, v11
+; GFX9-NEXT: v_cvt_f32_u32_e32 v3, v10
+; GFX9-NEXT: v_sub_co_u32_e32 v7, vcc, 0, v11
+; GFX9-NEXT: v_subb_co_u32_e32 v8, vcc, 0, v10, vcc
+; GFX9-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2
+; GFX9-NEXT: v_rcp_f32_e32 v2, v2
+; GFX9-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
+; GFX9-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
+; GFX9-NEXT: v_trunc_f32_e32 v3, v3
+; GFX9-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
+; GFX9-NEXT: v_cvt_u32_f32_e32 v6, v2
+; GFX9-NEXT: v_cvt_u32_f32_e32 v12, v3
+; GFX9-NEXT: v_mul_lo_u32 v4, v8, v6
+; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v6, 0
+; GFX9-NEXT: v_mul_lo_u32 v5, v7, v12
+; GFX9-NEXT: v_mul_hi_u32 v13, v6, v2
+; GFX9-NEXT: v_add3_u32 v5, v3, v5, v4
+; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v6, v5, 0
+; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v13, v3
+; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v12, v2, 0
+; GFX9-NEXT: v_addc_co_u32_e32 v14, vcc, 0, v4, vcc
+; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v12, v5, 0
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v13, v2
+; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v14, v3, vcc
+; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v5, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
+; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v13, vcc, v6, v2
+; GFX9-NEXT: v_addc_co_u32_e32 v12, vcc, v12, v3, vcc
+; GFX9-NEXT: v_mul_lo_u32 v4, v7, v12
+; GFX9-NEXT: v_mul_lo_u32 v5, v8, v13
+; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v7, v13, 0
+; GFX9-NEXT: v_add3_u32 v5, v3, v4, v5
+; GFX9-NEXT: v_mad_u64_u32 v[3:4], s[4:5], v12, v5, 0
+; GFX9-NEXT: v_mad_u64_u32 v[5:6], s[4:5], v13, v5, 0
+; GFX9-NEXT: v_mul_hi_u32 v14, v13, v2
+; GFX9-NEXT: v_mad_u64_u32 v[7:8], s[4:5], v12, v2, 0
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v14, v5
; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v6, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v13, v4
-; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, v12, v5, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v7
+; GFX9-NEXT: v_addc_co_u32_e32 v2, vcc, v5, v8, vcc
+; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, 0, v4, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v3
+; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v4, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v13, v2
+; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v12, v3, vcc
; GFX9-NEXT: v_ashrrev_i32_e32 v7, 31, v1
; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v0, v7
-; GFX9-NEXT: v_xor_b32_e32 v8, v0, v7
-; GFX9-NEXT: v_addc_co_u32_e32 v6, vcc, v1, v7, vcc
-; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v8, v5, 0
-; GFX9-NEXT: v_mul_hi_u32 v9, v8, v4
-; GFX9-NEXT: v_xor_b32_e32 v6, v6, v7
-; GFX9-NEXT: v_add_co_u32_e32 v9, vcc, v9, v0
-; GFX9-NEXT: v_addc_co_u32_e32 v10, vcc, 0, v1, vcc
-; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v6, v4, 0
-; GFX9-NEXT: v_mad_u64_u32 v[4:5], s[4:5], v6, v5, 0
-; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v9, v0
-; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v10, v1, vcc
-; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v5, vcc
-; GFX9-NEXT: v_add_co_u32_e32 v4, vcc, v0, v4
-; GFX9-NEXT: v_addc_co_u32_e32 v5, vcc, 0, v1, vcc
-; GFX9-NEXT: v_mul_lo_u32 v9, v2, v4
-; GFX9-NEXT: v_mul_lo_u32 v10, v3, v5
-; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v3, v4, 0
-; GFX9-NEXT: v_add3_u32 v1, v1, v10, v9
-; GFX9-NEXT: v_sub_u32_e32 v9, v6, v1
-; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v8, v0
-; GFX9-NEXT: v_subb_co_u32_e64 v8, s[4:5], v9, v2, vcc
-; GFX9-NEXT: v_sub_co_u32_e64 v9, s[4:5], v0, v3
-; GFX9-NEXT: v_subbrev_co_u32_e64 v10, s[6:7], 0, v8, s[4:5]
-; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v10, v2
-; GFX9-NEXT: v_cndmask_b32_e64 v12, 0, -1, s[6:7]
-; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v9, v3
+; GFX9-NEXT: v_xor_b32_e32 v5, v0, v7
+; GFX9-NEXT: v_addc_co_u32_e32 v4, vcc, v1, v7, vcc
+; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v5, v3, 0
+; GFX9-NEXT: v_mul_hi_u32 v6, v5, v2
+; GFX9-NEXT: v_xor_b32_e32 v4, v4, v7
+; GFX9-NEXT: v_add_co_u32_e32 v6, vcc, v6, v0
+; GFX9-NEXT: v_addc_co_u32_e32 v8, vcc, 0, v1, vcc
+; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v4, v2, 0
+; GFX9-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v4, v3, 0
+; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, v6, v0
+; GFX9-NEXT: v_addc_co_u32_e32 v0, vcc, v8, v1, vcc
+; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v0, v2
+; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v1, vcc
+; GFX9-NEXT: v_mul_lo_u32 v6, v10, v2
+; GFX9-NEXT: v_mul_lo_u32 v8, v11, v3
+; GFX9-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v11, v2, 0
+; GFX9-NEXT: v_add3_u32 v1, v1, v8, v6
+; GFX9-NEXT: v_sub_u32_e32 v6, v4, v1
+; GFX9-NEXT: v_sub_co_u32_e32 v0, vcc, v5, v0
+; GFX9-NEXT: v_subb_co_u32_e64 v6, s[4:5], v6, v10, vcc
+; GFX9-NEXT: v_sub_co_u32_e64 v8, s[4:5], v0, v11
+; GFX9-NEXT: v_subbrev_co_u32_e64 v12, s[6:7], 0, v6, s[4:5]
+; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v12, v10
+; GFX9-NEXT: v_cndmask_b32_e64 v5, 0, -1, s[6:7]
+; GFX9-NEXT: v_cmp_ge_u32_e64 s[6:7], v8, v11
; GFX9-NEXT: v_cndmask_b32_e64 v13, 0, -1, s[6:7]
-; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v10, v2
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v12, v13, s[6:7]
-; GFX9-NEXT: v_add_co_u32_e64 v13, s[6:7], 2, v4
-; GFX9-NEXT: v_addc_co_u32_e64 v14, s[6:7], 0, v5, s[6:7]
-; GFX9-NEXT: v_add_co_u32_e64 v15, s[6:7], 1, v4
-; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v6, v1, vcc
-; GFX9-NEXT: v_addc_co_u32_e64 v16, s[6:7], 0, v5, s[6:7]
-; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v12
-; GFX9-NEXT: v_cndmask_b32_e64 v6, 0, -1, vcc
-; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v3
-; GFX9-NEXT: v_cndmask_b32_e64 v12, v16, v14, s[6:7]
+; GFX9-NEXT: v_cmp_eq_u32_e64 s[6:7], v12, v10
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v5, v13, s[6:7]
+; GFX9-NEXT: v_add_co_u32_e64 v13, s[6:7], 2, v2
+; GFX9-NEXT: v_addc_co_u32_e64 v14, s[6:7], 0, v3, s[6:7]
+; GFX9-NEXT: v_add_co_u32_e64 v15, s[6:7], 1, v2
+; GFX9-NEXT: v_subb_co_u32_e32 v1, vcc, v4, v1, vcc
+; GFX9-NEXT: v_addc_co_u32_e64 v16, s[6:7], 0, v3, s[6:7]
+; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v1, v10
+; GFX9-NEXT: v_cmp_ne_u32_e64 s[6:7], 0, v5
+; GFX9-NEXT: v_cndmask_b32_e64 v4, 0, -1, vcc
+; GFX9-NEXT: v_cmp_ge_u32_e32 vcc, v0, v11
+; GFX9-NEXT: v_cndmask_b32_e64 v5, v16, v14, s[6:7]
; GFX9-NEXT: v_cndmask_b32_e64 v14, 0, -1, vcc
-; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v2
-; GFX9-NEXT: v_subb_co_u32_e64 v2, s[4:5], v8, v2, s[4:5]
-; GFX9-NEXT: v_sub_co_u32_e64 v3, s[4:5], v9, v3
-; GFX9-NEXT: v_cndmask_b32_e32 v6, v6, v14, vcc
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, v1, v10
+; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v14, vcc
+; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
+; GFX9-NEXT: v_cndmask_b32_e64 v4, v15, v13, s[6:7]
+; GFX9-NEXT: v_cndmask_b32_e32 v3, v3, v5, vcc
+; GFX9-NEXT: v_cndmask_b32_e32 v2, v2, v4, vcc
+; GFX9-NEXT: v_xor_b32_e32 v5, v7, v9
+; GFX9-NEXT: v_xor_b32_e32 v2, v2, v5
+; GFX9-NEXT: v_xor_b32_e32 v3, v3, v5
+; GFX9-NEXT: v_sub_co_u32_e64 v4, s[8:9], v2, v5
+; GFX9-NEXT: v_subb_co_u32_e64 v2, s[4:5], v6, v10, s[4:5]
+; GFX9-NEXT: v_subb_co_u32_e64 v5, s[8:9], v3, v5, s[8:9]
+; GFX9-NEXT: v_sub_co_u32_e64 v3, s[4:5], v8, v11
; GFX9-NEXT: v_subbrev_co_u32_e64 v2, s[4:5], 0, v2, s[4:5]
-; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v6
-; GFX9-NEXT: v_cndmask_b32_e64 v6, v15, v13, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v10, v2, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v4, v4, v6, vcc
-; GFX9-NEXT: v_xor_b32_e32 v6, v7, v11
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v12, v2, s[6:7]
; GFX9-NEXT: v_cndmask_b32_e32 v1, v1, v2, vcc
-; GFX9-NEXT: v_cndmask_b32_e64 v2, v9, v3, s[6:7]
-; GFX9-NEXT: v_cndmask_b32_e32 v5, v5, v12, vcc
-; GFX9-NEXT: v_xor_b32_e32 v4, v4, v6
+; GFX9-NEXT: v_cndmask_b32_e64 v2, v8, v3, s[6:7]
; GFX9-NEXT: v_cndmask_b32_e32 v0, v0, v2, vcc
-; GFX9-NEXT: v_xor_b32_e32 v5, v5, v6
-; GFX9-NEXT: v_sub_co_u32_e64 v4, s[8:9], v4, v6
; GFX9-NEXT: v_xor_b32_e32 v0, v0, v7
-; GFX9-NEXT: v_subb_co_u32_e64 v5, s[8:9], v5, v6, s[8:9]
; GFX9-NEXT: v_xor_b32_e32 v1, v1, v7
; GFX9-NEXT: v_sub_co_u32_e32 v6, vcc, v0, v7
; GFX9-NEXT: v_subb_co_u32_e32 v7, vcc, v1, v7, vcc
@@ -884,12 +884,12 @@ define <2 x i64> @udivrem64(i64 %a, i64 %b) {
; GFX9-NEXT: v_cvt_f32_u32_e32 v5, v3
; GFX9-NEXT: v_sub_co_u32_e32 v10, vcc, 0, v2
; GFX9-NEXT: v_subb_co_u32_e32 v11, vcc, 0, v3, vcc
-; GFX9-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
+; GFX9-NEXT: v_madmk_f32 v4, v5, 0x4f800000, v4
; GFX9-NEXT: v_rcp_f32_e32 v4, v4
; GFX9-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GFX9-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
; GFX9-NEXT: v_trunc_f32_e32 v5, v5
-; GFX9-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5
+; GFX9-NEXT: v_madmk_f32 v4, v5, 0xcf800000, v4
; GFX9-NEXT: v_cvt_u32_f32_e32 v8, v5
; GFX9-NEXT: v_cvt_u32_f32_e32 v9, v4
; GFX9-NEXT: v_mul_lo_u32 v6, v10, v8
diff --git a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
index a9a6075516f6c3e..50693a92bc92cb9 100644
--- a/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
+++ b/llvm/test/CodeGen/AMDGPU/carryout-selection.ll
@@ -1804,12 +1804,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
; CISI-NEXT: v_cvt_f32_u32_e32 v1, s3
; CISI-NEXT: s_sub_u32 s0, 0, s2
; CISI-NEXT: s_subb_u32 s1, 0, s3
-; CISI-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
+; CISI-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; CISI-NEXT: v_rcp_f32_e32 v0, v0
; CISI-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; CISI-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; CISI-NEXT: v_trunc_f32_e32 v1, v1
-; CISI-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; CISI-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; CISI-NEXT: v_cvt_u32_f32_e32 v1, v1
; CISI-NEXT: v_cvt_u32_f32_e32 v0, v0
; CISI-NEXT: v_mul_lo_u32 v2, s0, v1
@@ -1954,12 +1954,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
; VI-NEXT: v_cvt_f32_u32_e32 v1, s3
; VI-NEXT: s_sub_u32 s8, 0, s2
; VI-NEXT: s_subb_u32 s9, 0, s3
-; VI-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
+; VI-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; VI-NEXT: v_rcp_f32_e32 v0, v0
; VI-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; VI-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; VI-NEXT: v_trunc_f32_e32 v1, v1
-; VI-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; VI-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; VI-NEXT: v_cvt_u32_f32_e32 v4, v1
; VI-NEXT: v_cvt_u32_f32_e32 v5, v0
; VI-NEXT: v_mul_lo_u32 v2, s8, v4
@@ -2111,12 +2111,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GFX9-NEXT: v_cvt_f32_u32_e32 v1, s3
; GFX9-NEXT: s_sub_u32 s0, 0, s2
; GFX9-NEXT: s_subb_u32 s1, 0, s3
-; GFX9-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
+; GFX9-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GFX9-NEXT: v_rcp_f32_e32 v0, v0
; GFX9-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX9-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX9-NEXT: v_trunc_f32_e32 v1, v1
-; GFX9-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX9-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX9-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX9-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX9-NEXT: v_readfirstlane_b32 s10, v1
@@ -2279,12 +2279,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GFX1010-NEXT: v_cvt_f32_u32_e32 v1, s3
; GFX1010-NEXT: s_sub_u32 s9, 0, s2
; GFX1010-NEXT: s_subb_u32 s10, 0, s3
-; GFX1010-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
+; GFX1010-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GFX1010-NEXT: v_rcp_f32_e32 v0, v0
; GFX1010-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX1010-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX1010-NEXT: v_trunc_f32_e32 v1, v1
-; GFX1010-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX1010-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX1010-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX1010-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX1010-NEXT: v_readfirstlane_b32 s0, v1
@@ -2441,12 +2441,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GFX1030W32-NEXT: v_cvt_f32_u32_e32 v1, s3
; GFX1030W32-NEXT: s_sub_u32 s9, 0, s2
; GFX1030W32-NEXT: s_subb_u32 s10, 0, s3
-; GFX1030W32-NEXT: v_fmac_f32_e32 v0, 0x4f800000, v1
+; GFX1030W32-NEXT: v_fmamk_f32 v0, v1, 0x4f800000, v0
; GFX1030W32-NEXT: v_rcp_f32_e32 v0, v0
; GFX1030W32-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX1030W32-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX1030W32-NEXT: v_trunc_f32_e32 v1, v1
-; GFX1030W32-NEXT: v_fmac_f32_e32 v0, 0xcf800000, v1
+; GFX1030W32-NEXT: v_fmamk_f32 v0, v1, 0xcf800000, v0
; GFX1030W32-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX1030W32-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX1030W32-NEXT: v_readfirstlane_b32 s0, v1
@@ -2603,12 +2603,12 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GFX1030W64-NEXT: v_cvt_f32_u32_e32 v1, s3
; GFX1030W64-NEXT: s_sub_u32 s9, 0, s2
; GFX1030W64-NEXT: s_subb_u32 s10, 0, s3
-; GFX1030W64-NEXT: v_fmac_f32_e32 v0, 0x4f800000, v1
+; GFX1030W64-NEXT: v_fmamk_f32 v0, v1, 0x4f800000, v0
; GFX1030W64-NEXT: v_rcp_f32_e32 v0, v0
; GFX1030W64-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX1030W64-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX1030W64-NEXT: v_trunc_f32_e32 v1, v1
-; GFX1030W64-NEXT: v_fmac_f32_e32 v0, 0xcf800000, v1
+; GFX1030W64-NEXT: v_fmamk_f32 v0, v1, 0xcf800000, v0
; GFX1030W64-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX1030W64-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX1030W64-NEXT: v_readfirstlane_b32 s8, v1
@@ -2766,7 +2766,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GFX11-NEXT: s_sub_u32 s9, 0, s2
; GFX11-NEXT: s_subb_u32 s10, 0, s3
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_fmac_f32_e32 v0, 0x4f800000, v1
+; GFX11-NEXT: v_fmamk_f32 v0, v1, 0x4f800000, v0
; GFX11-NEXT: v_rcp_f32_e32 v0, v0
; GFX11-NEXT: s_waitcnt_depctr 0xfff
; GFX11-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
@@ -2774,7 +2774,7 @@ define amdgpu_kernel void @sudiv64(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GFX11-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX11-NEXT: v_trunc_f32_e32 v1, v1
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_2)
-; GFX11-NEXT: v_fmac_f32_e32 v0, 0xcf800000, v1
+; GFX11-NEXT: v_fmamk_f32 v0, v1, 0xcf800000, v0
; GFX11-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX11-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
diff --git a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
index fe649d433304178..9fb0cab068d2862 100644
--- a/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
+++ b/llvm/test/CodeGen/AMDGPU/dagcombine-fma-fmad.ll
@@ -7,6 +7,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
; GFX10: ; %bb.0: ; %.entry
; GFX10-NEXT: image_sample v[0:1], v[0:1], s[0:7], s[0:3] dmask:0x3 dim:SQ_RSRC_IMG_2D
; GFX10-NEXT: v_mov_b32_e32 v4, 0
+; GFX10-NEXT: v_mov_b32_e32 v7, 0x3ca3d70a
; GFX10-NEXT: s_waitcnt vmcnt(0)
; GFX10-NEXT: s_clause 0x1
; GFX10-NEXT: image_sample v2, v[0:1], s[0:7], s[0:3] dmask:0x4 dim:SQ_RSRC_IMG_2D
@@ -36,34 +37,33 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
; GFX10-NEXT: v_fma_f32 v1, v1, v5, s28
; GFX10-NEXT: v_max_f32_e64 v6, s0, s0 clamp
; GFX10-NEXT: v_add_f32_e64 v5, s29, -1.0
-; GFX10-NEXT: v_sub_f32_e32 v8, s0, v1
-; GFX10-NEXT: v_fma_f32 v7, -s2, v6, s6
+; GFX10-NEXT: v_sub_f32_e32 v9, s0, v1
+; GFX10-NEXT: v_fma_f32 v8, -s2, v6, s6
; GFX10-NEXT: v_fma_f32 v5, v6, v5, 1.0
-; GFX10-NEXT: v_mad_f32 v10, s2, v6, v2
-; GFX10-NEXT: s_mov_b32 s0, 0x3c23d70a
-; GFX10-NEXT: v_fmac_f32_e32 v1, v6, v8
-; GFX10-NEXT: v_fmac_f32_e32 v10, v7, v6
+; GFX10-NEXT: v_mad_f32 v11, s2, v6, v2
+; GFX10-NEXT: v_fmac_f32_e32 v1, v6, v9
+; GFX10-NEXT: v_fmac_f32_e32 v11, v8, v6
; GFX10-NEXT: s_waitcnt lgkmcnt(0)
-; GFX10-NEXT: v_mul_f32_e32 v9, s10, v0
+; GFX10-NEXT: v_mul_f32_e32 v10, s10, v0
; GFX10-NEXT: v_fma_f32 v0, -v0, s10, s14
-; GFX10-NEXT: v_mul_f32_e32 v8, s18, v2
+; GFX10-NEXT: v_mul_f32_e32 v9, s18, v2
; GFX10-NEXT: v_mul_f32_e32 v3, s22, v3
-; GFX10-NEXT: v_fmac_f32_e32 v9, v0, v6
+; GFX10-NEXT: v_fmac_f32_e32 v10, v0, v6
; GFX10-NEXT: v_sub_f32_e32 v0, v1, v5
-; GFX10-NEXT: v_mul_f32_e32 v1, v8, v6
-; GFX10-NEXT: v_mul_f32_e32 v7, v6, v3
-; GFX10-NEXT: v_fma_f32 v3, -v6, v3, v9
+; GFX10-NEXT: v_mul_f32_e32 v1, v9, v6
+; GFX10-NEXT: v_mul_f32_e32 v8, v6, v3
+; GFX10-NEXT: v_fma_f32 v3, -v6, v3, v10
; GFX10-NEXT: v_fmac_f32_e32 v5, v0, v6
; GFX10-NEXT: v_fma_f32 v0, v2, s26, -v1
-; GFX10-NEXT: v_fmac_f32_e32 v7, v3, v6
+; GFX10-NEXT: v_fmac_f32_e32 v8, v3, v6
; GFX10-NEXT: v_fmac_f32_e32 v1, v0, v6
; GFX10-NEXT: v_mul_f32_e32 v0, v2, v6
; GFX10-NEXT: s_waitcnt vmcnt(0)
-; GFX10-NEXT: v_add_f32_e32 v4, v4, v10
+; GFX10-NEXT: v_add_f32_e32 v4, v4, v11
; GFX10-NEXT: v_mul_f32_e32 v3, v4, v6
-; GFX10-NEXT: v_fmaak_f32 v4, s0, v5, 0x3ca3d70a
+; GFX10-NEXT: v_fmamk_f32 v4, v5, 0x3c23d70a, v7
; GFX10-NEXT: v_mul_f32_e32 v1, v3, v1
-; GFX10-NEXT: v_mul_f32_e32 v2, v7, v4
+; GFX10-NEXT: v_mul_f32_e32 v2, v8, v4
; GFX10-NEXT: v_fmac_f32_e32 v1, v2, v0
; GFX10-NEXT: v_max_f32_e32 v0, 0, v1
; GFX10-NEXT: ; return to shader part epilog
@@ -71,7 +71,7 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
; GFX11-LABEL: _amdgpu_ps_main:
; GFX11: ; %bb.0: ; %.entry
; GFX11-NEXT: image_sample v[0:1], v[0:1], s[0:7], s[0:3] dmask:0x3 dim:SQ_RSRC_IMG_2D
-; GFX11-NEXT: v_mov_b32_e32 v4, 0
+; GFX11-NEXT: v_dual_mov_b32 v4, 0 :: v_dual_mov_b32 v7, 0x3ca3d70a
; GFX11-NEXT: s_waitcnt vmcnt(0)
; GFX11-NEXT: s_clause 0x1
; GFX11-NEXT: image_sample v2, v[0:1], s[0:7], s[0:3] dmask:0x4 dim:SQ_RSRC_IMG_2D
@@ -96,43 +96,40 @@ define amdgpu_ps float @_amdgpu_ps_main() #0 {
; GFX11-NEXT: s_buffer_load_b128 s[20:23], s[0:3], 0x70
; GFX11-NEXT: v_fma_f32 v1, v1, v5, s28
; GFX11-NEXT: v_max_f32_e64 v6, s0, s0 clamp
-; GFX11-NEXT: s_buffer_load_b128 s[24:27], s[0:3], 0x10
; GFX11-NEXT: v_add_f32_e64 v5, s29, -1.0
+; GFX11-NEXT: s_buffer_load_b128 s[24:27], s[0:3], 0x10
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_sub_f32_e32 v8, s0, v1
-; GFX11-NEXT: v_fma_f32 v7, -s2, v6, s6
-; GFX11-NEXT: v_fma_f32 v10, s2, v6, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_4)
+; GFX11-NEXT: v_sub_f32_e32 v9, s0, v1
+; GFX11-NEXT: v_fma_f32 v8, -s2, v6, s6
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3)
; GFX11-NEXT: v_fma_f32 v5, v6, v5, 1.0
-; GFX11-NEXT: s_mov_b32 s0, 0x3c23d70a
+; GFX11-NEXT: v_fma_f32 v11, s2, v6, v2
; GFX11-NEXT: s_waitcnt lgkmcnt(0)
-; GFX11-NEXT: v_mul_f32_e32 v9, s10, v0
+; GFX11-NEXT: v_mul_f32_e32 v10, s10, v0
; GFX11-NEXT: v_fma_f32 v0, -v0, s10, s14
-; GFX11-NEXT: v_mul_f32_e32 v3, s22, v3
-; GFX11-NEXT: v_dual_fmac_f32 v1, v6, v8 :: v_dual_mul_f32 v8, s18, v2
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_fmac_f32_e32 v9, v0, v6
-; GFX11-NEXT: v_dual_fmac_f32 v10, v7, v6 :: v_dual_mul_f32 v7, v6, v3
+; GFX11-NEXT: v_fmac_f32_e32 v1, v6, v9
+; GFX11-NEXT: v_mul_f32_e32 v9, s18, v2
; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_fmac_f32_e32 v10, v0, v6
; GFX11-NEXT: v_sub_f32_e32 v0, v1, v5
-; GFX11-NEXT: v_fma_f32 v3, -v6, v3, v9
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_fmac_f32_e32 v7, v3, v6
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fmac_f32_e32 v5, v0, v6
-; GFX11-NEXT: v_mul_f32_e32 v1, v8, v6
+; GFX11-NEXT: v_mul_f32_e32 v3, s22, v3
+; GFX11-NEXT: v_dual_fmac_f32 v11, v8, v6 :: v_dual_mul_f32 v8, v6, v3
+; GFX11-NEXT: v_mul_f32_e32 v1, v9, v6
+; GFX11-NEXT: v_fma_f32 v3, -v6, v3, v10
; GFX11-NEXT: s_waitcnt vmcnt(0)
-; GFX11-NEXT: v_add_f32_e32 v4, v4, v10
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_3)
-; GFX11-NEXT: v_dual_mul_f32 v3, v4, v6 :: v_dual_fmaak_f32 v4, s0, v5, 0x3ca3d70a
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_3)
+; GFX11-NEXT: v_add_f32_e32 v4, v4, v11
; GFX11-NEXT: v_fma_f32 v0, v2, s26, -v1
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_4)
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_3) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fmac_f32_e32 v1, v0, v6
; GFX11-NEXT: v_mul_f32_e32 v0, v2, v6
-; GFX11-NEXT: v_mul_f32_e32 v2, v7, v4
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_3) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT: v_mul_f32_e32 v1, v3, v1
+; GFX11-NEXT: v_fmac_f32_e32 v8, v3, v6
+; GFX11-NEXT: v_dual_mul_f32 v3, v4, v6 :: v_dual_fmamk_f32 v4, v5, 0x3c23d70a, v7
+; GFX11-NEXT: v_dual_mul_f32 v1, v3, v1 :: v_dual_mul_f32 v2, v8, v4
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX11-NEXT: v_fmac_f32_e32 v1, v2, v0
-; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX11-NEXT: v_max_f32_e32 v0, 0, v1
; GFX11-NEXT: ; return to shader part epilog
.entry:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll
index 226670a550014ca..d4ad53291070d78 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.fmad.ftz.f16.ll
@@ -33,7 +33,7 @@ define amdgpu_kernel void @mad_f16_imm_a(
}
; GCN-LABEL: {{^}}mad_f16_imm_b:
-; GCN: v_mac_f16_e32 {{v[0-9]+}}, 0x4800, {{v[0-9]+$}}
+; GCN: v_madmk_f16 {{v[0-9]+}}, {{v[0-9]+}}, 0x4800, {{v[0-9]+$}}
define amdgpu_kernel void @mad_f16_imm_b(
ptr addrspace(1) %r,
ptr addrspace(1) %a,
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
index f90d338ffc48736..1926d0f8f0d6fc9 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.fmuladd.f16.ll
@@ -256,8 +256,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_a(
; SI-NEXT: s_mov_b32 s1, s5
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-NEXT: v_mac_f32_e32 v1, 0x40400000, v0
-; SI-NEXT: v_cvt_f16_f32_e32 v0, v1
+; SI-NEXT: v_madmk_f32 v0, v0, 0x40400000, v1
+; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
@@ -280,8 +280,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_a(
; VI-FLUSH-NEXT: s_waitcnt vmcnt(0)
; VI-FLUSH-NEXT: s_mov_b32 s0, s4
; VI-FLUSH-NEXT: s_mov_b32 s1, s5
-; VI-FLUSH-NEXT: v_mac_f16_e32 v1, 0x4200, v0
-; VI-FLUSH-NEXT: buffer_store_short v1, off, s[0:3], 0
+; VI-FLUSH-NEXT: v_madmk_f16 v0, v0, 0x4200, v1
+; VI-FLUSH-NEXT: buffer_store_short v0, off, s[0:3], 0
; VI-FLUSH-NEXT: s_endpgm
;
; VI-DENORM-LABEL: fmuladd_f16_imm_a:
@@ -353,8 +353,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_a(
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
; GFX10-DENORM-NEXT: s_mov_b32 s0, s4
; GFX10-DENORM-NEXT: s_mov_b32 s1, s5
-; GFX10-DENORM-NEXT: v_fmac_f16_e32 v1, 0x4200, v0
-; GFX10-DENORM-NEXT: buffer_store_short v1, off, s[0:3], 0
+; GFX10-DENORM-NEXT: v_fmamk_f16 v0, v0, 0x4200, v1
+; GFX10-DENORM-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX10-DENORM-NEXT: s_endpgm
;
; GFX11-FLUSH-LABEL: fmuladd_f16_imm_a:
@@ -442,8 +442,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_b(
; SI-NEXT: s_mov_b32 s1, s5
; SI-NEXT: v_cvt_f32_f16_e32 v0, v0
; SI-NEXT: v_cvt_f32_f16_e32 v1, v1
-; SI-NEXT: v_mac_f32_e32 v1, 0x40400000, v0
-; SI-NEXT: v_cvt_f16_f32_e32 v0, v1
+; SI-NEXT: v_madmk_f32 v0, v0, 0x40400000, v1
+; SI-NEXT: v_cvt_f16_f32_e32 v0, v0
; SI-NEXT: buffer_store_short v0, off, s[0:3], 0
; SI-NEXT: s_endpgm
;
@@ -466,8 +466,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_b(
; VI-FLUSH-NEXT: s_waitcnt vmcnt(0)
; VI-FLUSH-NEXT: s_mov_b32 s0, s4
; VI-FLUSH-NEXT: s_mov_b32 s1, s5
-; VI-FLUSH-NEXT: v_mac_f16_e32 v1, 0x4200, v0
-; VI-FLUSH-NEXT: buffer_store_short v1, off, s[0:3], 0
+; VI-FLUSH-NEXT: v_madmk_f16 v0, v0, 0x4200, v1
+; VI-FLUSH-NEXT: buffer_store_short v0, off, s[0:3], 0
; VI-FLUSH-NEXT: s_endpgm
;
; VI-DENORM-LABEL: fmuladd_f16_imm_b:
@@ -539,8 +539,8 @@ define amdgpu_kernel void @fmuladd_f16_imm_b(
; GFX10-DENORM-NEXT: s_waitcnt vmcnt(0)
; GFX10-DENORM-NEXT: s_mov_b32 s0, s4
; GFX10-DENORM-NEXT: s_mov_b32 s1, s5
-; GFX10-DENORM-NEXT: v_fmac_f16_e32 v1, 0x4200, v0
-; GFX10-DENORM-NEXT: buffer_store_short v1, off, s[0:3], 0
+; GFX10-DENORM-NEXT: v_fmamk_f16 v0, v0, 0x4200, v1
+; GFX10-DENORM-NEXT: buffer_store_short v0, off, s[0:3], 0
; GFX10-DENORM-NEXT: s_endpgm
;
; GFX11-FLUSH-LABEL: fmuladd_f16_imm_b:
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
index f55242a8726beb1..528232a203acfe1 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
@@ -203,7 +203,7 @@ define amdgpu_kernel void @s_log_f32(ptr addrspace(1) %out, float %in) {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
; GFX1100-SDAG-NEXT: v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -2269,7 +2269,7 @@ define float @v_log_f32(float %in) {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
@@ -2472,7 +2472,7 @@ define float @v_log_fabs_f32(float %in) {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -2675,7 +2675,7 @@ define float @v_log_fneg_fabs_f32(float %in) {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -2878,7 +2878,7 @@ define float @v_log_fneg_f32(float %in) {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
@@ -3015,9 +3015,7 @@ define float @v_log_f32_fast(float %in) {
; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3f317218, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log_f32_fast:
@@ -3135,9 +3133,7 @@ define float @v_log_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3f317218, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log_f32_unsafe_math_attr:
@@ -3255,9 +3251,7 @@ define float @v_log_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true" {
; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3f317218, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log_f32_approx_fn_attr:
@@ -3441,7 +3435,7 @@ define float @v_log_f32_ninf(float %in) {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
@@ -3577,9 +3571,7 @@ define float @v_log_f32_afn(float %in) {
; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3f317218, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log_f32_afn:
@@ -3726,9 +3718,7 @@ define float @v_log_f32_afn_dynamic(float %in) #1 {
; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3f317218, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log_f32_afn_dynamic:
@@ -3844,11 +3834,10 @@ define float @v_fabs_log_f32_afn(float %in) {
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc1b17218, s0
; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3f317218, v0
-; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3f317218, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_fabs_log_f32_afn:
@@ -3982,7 +3971,7 @@ define float @v_log_f32_daz(float %in) #0 {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -4174,7 +4163,7 @@ define float @v_log_f32_nnan(float %in) {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
@@ -4324,7 +4313,7 @@ define float @v_log_f32_nnan_daz(float %in) #0 {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -4516,7 +4505,7 @@ define float @v_log_f32_nnan_dynamic(float %in) #1 {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
@@ -4666,7 +4655,7 @@ define float @v_log_f32_ninf_daz(float %in) #0 {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -4858,7 +4847,7 @@ define float @v_log_f32_ninf_dynamic(float %in) #1 {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
@@ -5041,9 +5030,9 @@ define float @v_log_f32_nnan_ninf(float %in) {
; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3377d1cf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -5152,9 +5141,9 @@ define float @v_log_f32_nnan_ninf_daz(float %in) #0 {
; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3377d1cf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log_f32_nnan_ninf_daz:
@@ -5322,9 +5311,9 @@ define float @v_log_f32_nnan_ninf_dynamic(float %in) #1 {
; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3f317217, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3377d1cf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x41b17218, vcc_lo
; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -5548,7 +5537,7 @@ define float @v_log_f32_dynamic_mode(float %in) #1 {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
@@ -5722,7 +5711,7 @@ define float @v_log_f32_undef() {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -5887,7 +5876,7 @@ define float @v_log_f32_0() {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -6041,7 +6030,7 @@ define float @v_log_f32_from_fpext_f16(i16 %src.i) {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -6212,7 +6201,7 @@ define float @v_log_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -6390,7 +6379,7 @@ define float @v_log_f32_from_fpext_bf16(bfloat %src) {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3f317217, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3377d1cf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3377d1cf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
index ab6325216c06d9f..2e5bf2e5609512b 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
@@ -203,7 +203,7 @@ define amdgpu_kernel void @s_log10_f32(ptr addrspace(1) %out, float %in) {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
; GFX1100-SDAG-NEXT: v_dual_add_f32 v1, v1, v2 :: v_dual_mov_b32 v2, 0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -2269,7 +2269,7 @@ define float @v_log10_f32(float %in) {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
@@ -2472,7 +2472,7 @@ define float @v_log10_fabs_f32(float %in) {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -2675,7 +2675,7 @@ define float @v_log10_fneg_fabs_f32(float %in) {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -2878,7 +2878,7 @@ define float @v_log10_fneg_f32(float %in) {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
@@ -3015,9 +3015,7 @@ define float @v_log10_f32_fast(float %in) {
; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log10_f32_fast:
@@ -3135,9 +3133,7 @@ define float @v_log10_f32_unsafe_math_attr(float %in) "unsafe-fp-math"="true" {
; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log10_f32_unsafe_math_attr:
@@ -3255,9 +3251,7 @@ define float @v_log10_f32_approx_fn_attr(float %in) "approx-func-fp-math"="true"
; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log10_f32_approx_fn_attr:
@@ -3441,7 +3435,7 @@ define float @v_log10_f32_ninf(float %in) {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
@@ -3577,9 +3571,7 @@ define float @v_log10_f32_afn(float %in) {
; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log10_f32_afn:
@@ -3726,9 +3718,7 @@ define float @v_log10_f32_afn_dynamic(float %in) #1 {
; GFX1100-SDAG-NEXT: v_mul_f32_e32 v0, v0, v2
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log10_f32_afn_dynamic:
@@ -3844,11 +3834,10 @@ define float @v_fabs_log10_f32_afn(float %in) {
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v2, 1.0, 0x4f800000, s0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0xc11a209b, s0
; GFX1100-SDAG-NEXT: v_mul_f32_e64 v0, |v0|, v2
-; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
+; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_log_f32_e32 v0, v0
; GFX1100-SDAG-NEXT: s_waitcnt_depctr 0xfff
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v1, 0x3e9a209b, v0
-; GFX1100-SDAG-NEXT: v_mov_b32_e32 v0, v1
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3e9a209b, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_fabs_log10_f32_afn:
@@ -3982,7 +3971,7 @@ define float @v_log10_f32_daz(float %in) #0 {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -4174,7 +4163,7 @@ define float @v_log10_f32_nnan(float %in) {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
@@ -4324,7 +4313,7 @@ define float @v_log10_f32_nnan_daz(float %in) #0 {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -4516,7 +4505,7 @@ define float @v_log10_f32_nnan_dynamic(float %in) #1 {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
@@ -4666,7 +4655,7 @@ define float @v_log10_f32_ninf_daz(float %in) #0 {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -4858,7 +4847,7 @@ define float @v_log10_f32_ninf_dynamic(float %in) #1 {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
@@ -5041,9 +5030,9 @@ define float @v_log10_f32_nnan_ninf(float %in) {
; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3284fbcf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -5152,9 +5141,9 @@ define float @v_log10_f32_nnan_ninf_daz(float %in) #0 {
; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3284fbcf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
;
; GFX1100-GISEL-LABEL: v_log10_f32_nnan_ninf_daz:
@@ -5322,9 +5311,9 @@ define float @v_log10_f32_nnan_ninf_dynamic(float %in) #1 {
; GFX1100-SDAG-NEXT: v_mul_f32_e32 v1, 0x3e9a209a, v0
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v0, v0, 0x3284fbcf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_1) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v2
+; GFX1100-SDAG-NEXT: v_add_f32_e32 v0, v1, v0
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v1, 0, 0x411a209b, vcc_lo
; GFX1100-SDAG-NEXT: v_sub_f32_e32 v0, v0, v1
; GFX1100-SDAG-NEXT: s_setpc_b64 s[30:31]
@@ -5548,7 +5537,7 @@ define float @v_log10_f32_dynamic_mode(float %in) #1 {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
@@ -5722,7 +5711,7 @@ define float @v_log10_f32_undef() {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -5887,7 +5876,7 @@ define float @v_log10_f32_0() {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -6041,7 +6030,7 @@ define float @v_log10_f32_from_fpext_f16(i16 %src.i) {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -6212,7 +6201,7 @@ define float @v_log10_f32_from_fpext_math_f16(i16 %src0.i, i16 %src1.i) {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 vcc_lo, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e32 v0, v0, v1, vcc_lo
@@ -6390,7 +6379,7 @@ define float @v_log10_f32_from_fpext_bf16(bfloat %src) {
; GFX1100-SDAG-NEXT: v_cmp_gt_f32_e64 s0, 0x7f800000, |v0|
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_fma_f32 v2, 0x3e9a209a, v0, -v1
-; GFX1100-SDAG-NEXT: v_fmac_f32_e32 v2, 0x3284fbcf, v0
+; GFX1100-SDAG-NEXT: v_fmamk_f32 v2, v0, 0x3284fbcf, v2
; GFX1100-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
; GFX1100-SDAG-NEXT: v_add_f32_e32 v1, v1, v2
; GFX1100-SDAG-NEXT: v_cndmask_b32_e64 v0, v0, v1, s0
diff --git a/llvm/test/CodeGen/AMDGPU/madmk.ll b/llvm/test/CodeGen/AMDGPU/madmk.ll
index 51a0a50fbbff5c3..00e226291e68b6f 100644
--- a/llvm/test/CodeGen/AMDGPU/madmk.ll
+++ b/llvm/test/CodeGen/AMDGPU/madmk.ll
@@ -11,7 +11,7 @@ declare float @llvm.fabs.f32(float) nounwind readnone
; GCN-LABEL: {{^}}madmk_f32:
; GCN-DAG: buffer_load_dword [[VA:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 glc{{$}}
; GCN-DAG: buffer_load_dword [[VB:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4
-; GCN: v_mac_f32_e32 [[VB]], 0x41200000, [[VA]]
+; GCN: v_madmk_f32 {{v[0-9]+}}, [[VA]], 0x41200000, [[VB]]
define amdgpu_kernel void @madmk_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
@@ -96,7 +96,7 @@ define amdgpu_kernel void @s_s_madmk_f32(ptr addrspace(1) noalias %out, [8 x i32
; GCN-DAG: s_load_dword [[SREG:s[0-9]+]]
; GCN-DAG: buffer_load_dword [[VREG1:v[0-9]+]]
; GCN: v_mov_b32_e32 [[VREG2:v[0-9]+]], [[SREG]]
-; GCN: v_mac_f32_e32 [[VREG2]], 0x41200000, [[VREG1]]
+; GCN: v_madmk_f32 {{v[0-9]+}}, [[VREG1]], 0x41200000, [[VREG2]]
; GCN: s_endpgm
define amdgpu_kernel void @v_s_madmk_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in, float %b) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
@@ -171,8 +171,9 @@ define amdgpu_kernel void @no_madmk_src2_modifier_f32(ptr addrspace(1) noalias %
; GCN-LABEL: {{^}}madmk_add_inline_imm_f32:
; GCN: buffer_load_dword [[A:v[0-9]+]]
-; GCN: s_mov_b32 [[SK:s[0-9]+]], 0x41200000
-; GCN: v_mad_f32 {{v[0-9]+}}, [[A]], [[SK]], 2.0
+; GCN: v_mov_b32_e32 [[B:v[0-9]+]], 2.0
+; GCN: v_madmk_f32 {{v[0-9]+}}, [[A]], 0x41200000, [[B]]
+
define amdgpu_kernel void @madmk_add_inline_imm_f32(ptr addrspace(1) noalias %out, ptr addrspace(1) noalias %in) #0 {
%tid = tail call i32 @llvm.amdgcn.workitem.id.x() nounwind readnone
%gep.0 = getelementptr float, ptr addrspace(1) %in, i32 %tid
diff --git a/llvm/test/CodeGen/AMDGPU/operand-folding.ll b/llvm/test/CodeGen/AMDGPU/operand-folding.ll
index a399b509014dd73..b54cc27db1d12cb 100644
--- a/llvm/test/CodeGen/AMDGPU/operand-folding.ll
+++ b/llvm/test/CodeGen/AMDGPU/operand-folding.ll
@@ -112,8 +112,8 @@ entry:
; A subregister use operand should not be tied.
; CHECK-LABEL: {{^}}no_fold_tied_subregister:
; CHECK: buffer_load_dwordx2 v[[[LO:[0-9]+]]:[[HI:[0-9]+]]]
-; CHECK: v_mac_f32_e32 v[[LO]], 0x41200000, v[[HI]]
-; CHECK: buffer_store_dword v[[LO]]
+; CHECK: v_madmk_f32 v[[RES:[0-9]+]], v[[HI]], 0x41200000, v[[LO]]
+; CHECK: buffer_store_dword v[[RES]]
define amdgpu_kernel void @no_fold_tied_subregister() #1 {
%tmp1 = load volatile <2 x float>, ptr addrspace(1) undef
%tmp2 = extractelement <2 x float> %tmp1, i32 0
diff --git a/llvm/test/CodeGen/AMDGPU/sdiv64.ll b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
index 705a2af73959065..4f2fd3f50494c94 100644
--- a/llvm/test/CodeGen/AMDGPU/sdiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/sdiv64.ll
@@ -19,14 +19,14 @@ define amdgpu_kernel void @s_test_sdiv(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GCN-NEXT: s_sub_u32 s4, 0, s10
; GCN-NEXT: s_subb_u32 s5, 0, s11
; GCN-NEXT: s_load_dwordx4 s[0:3], s[0:1], 0x9
-; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
+; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: s_waitcnt lgkmcnt(0)
; GCN-NEXT: s_ashr_i32 s12, s3, 31
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
-; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: s_add_u32 s2, s2, s12
@@ -247,12 +247,12 @@ define i64 @v_test_sdiv(i64 %x, i64 %y) {
; GCN-NEXT: v_cvt_f32_u32_e32 v6, v2
; GCN-NEXT: v_sub_i32_e32 v7, vcc, 0, v3
; GCN-NEXT: v_subb_u32_e32 v8, vcc, 0, v2, vcc
-; GCN-NEXT: v_mac_f32_e32 v5, 0x4f800000, v6
+; GCN-NEXT: v_madmk_f32 v5, v6, 0x4f800000, v5
; GCN-NEXT: v_rcp_f32_e32 v5, v5
; GCN-NEXT: v_mul_f32_e32 v5, 0x5f7ffffc, v5
; GCN-NEXT: v_mul_f32_e32 v6, 0x2f800000, v5
; GCN-NEXT: v_trunc_f32_e32 v6, v6
-; GCN-NEXT: v_mac_f32_e32 v5, 0xcf800000, v6
+; GCN-NEXT: v_madmk_f32 v5, v6, 0xcf800000, v5
; GCN-NEXT: v_cvt_u32_f32_e32 v5, v5
; GCN-NEXT: v_cvt_u32_f32_e32 v6, v6
; GCN-NEXT: v_mul_hi_u32 v9, v7, v5
@@ -1093,12 +1093,12 @@ define amdgpu_kernel void @s_test_sdiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: v_cvt_f32_u32_e32 v1, s3
; GCN-NEXT: s_sub_u32 s4, 0, s2
; GCN-NEXT: s_subb_u32 s5, 0, s3
-; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
+; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
-; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_mul_lo_u32 v2, s4, v1
@@ -1287,12 +1287,12 @@ define i64 @v_test_sdiv_k_num_i64(i64 %x) {
; GCN-NEXT: v_cvt_f32_u32_e32 v4, v1
; GCN-NEXT: v_sub_i32_e32 v5, vcc, 0, v0
; GCN-NEXT: v_subb_u32_e32 v6, vcc, 0, v1, vcc
-; GCN-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4
+; GCN-NEXT: v_madmk_f32 v3, v4, 0x4f800000, v3
; GCN-NEXT: v_rcp_f32_e32 v3, v3
; GCN-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3
; GCN-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3
; GCN-NEXT: v_trunc_f32_e32 v4, v4
-; GCN-NEXT: v_mac_f32_e32 v3, 0xcf800000, v4
+; GCN-NEXT: v_madmk_f32 v3, v4, 0xcf800000, v3
; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3
; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4
; GCN-NEXT: v_mul_hi_u32 v7, v5, v3
@@ -1484,12 +1484,12 @@ define i64 @v_test_sdiv_pow2_k_num_i64(i64 %x) {
; GCN-NEXT: v_cvt_f32_u32_e32 v4, v1
; GCN-NEXT: v_sub_i32_e32 v5, vcc, 0, v0
; GCN-NEXT: v_subb_u32_e32 v6, vcc, 0, v1, vcc
-; GCN-NEXT: v_mac_f32_e32 v3, 0x4f800000, v4
+; GCN-NEXT: v_madmk_f32 v3, v4, 0x4f800000, v3
; GCN-NEXT: v_rcp_f32_e32 v3, v3
; GCN-NEXT: v_mul_f32_e32 v3, 0x5f7ffffc, v3
; GCN-NEXT: v_mul_f32_e32 v4, 0x2f800000, v3
; GCN-NEXT: v_trunc_f32_e32 v4, v4
-; GCN-NEXT: v_mac_f32_e32 v3, 0xcf800000, v4
+; GCN-NEXT: v_madmk_f32 v3, v4, 0xcf800000, v3
; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3
; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4
; GCN-NEXT: v_mul_hi_u32 v7, v5, v3
diff --git a/llvm/test/CodeGen/AMDGPU/srem64.ll b/llvm/test/CodeGen/AMDGPU/srem64.ll
index 613349f32e2d5ad..24319a639da4472 100644
--- a/llvm/test/CodeGen/AMDGPU/srem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/srem64.ll
@@ -15,13 +15,13 @@ define amdgpu_kernel void @s_test_srem(ptr addrspace(1) %out, i64 %x, i64 %y) {
; GCN-NEXT: s_sub_u32 s0, 0, s12
; GCN-NEXT: s_subb_u32 s1, 0, s13
; GCN-NEXT: s_mov_b32 s4, s8
-; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
+; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: s_mov_b32 s5, s9
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
-; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_mul_lo_u32 v2, s0, v1
@@ -226,12 +226,12 @@ define i64 @v_test_srem(i64 %x, i64 %y) {
; GCN-NEXT: v_cvt_f32_u32_e32 v5, v2
; GCN-NEXT: v_sub_i32_e32 v6, vcc, 0, v3
; GCN-NEXT: v_subb_u32_e32 v7, vcc, 0, v2, vcc
-; GCN-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
+; GCN-NEXT: v_madmk_f32 v4, v5, 0x4f800000, v4
; GCN-NEXT: v_rcp_f32_e32 v4, v4
; GCN-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GCN-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
; GCN-NEXT: v_trunc_f32_e32 v5, v5
-; GCN-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5
+; GCN-NEXT: v_madmk_f32 v4, v5, 0xcf800000, v4
; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4
; GCN-NEXT: v_cvt_u32_f32_e32 v5, v5
; GCN-NEXT: v_mul_hi_u32 v8, v6, v4
@@ -894,7 +894,7 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
; GCN-NEXT: s_sub_u32 s0, 0, s12
; GCN-NEXT: s_subb_u32 s1, 0, s13
; GCN-NEXT: s_ashr_i32 s6, s7, 31
-; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
+; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: s_mov_b32 s7, s6
; GCN-NEXT: s_mov_b32 s8, s4
@@ -902,7 +902,7 @@ define amdgpu_kernel void @s_test_srem33_64(ptr addrspace(1) %out, i64 %x, i64 %
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
-; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_mul_lo_u32 v2, s0, v1
@@ -1290,13 +1290,13 @@ define amdgpu_kernel void @s_test_srem_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: s_sub_u32 s2, 0, s8
; GCN-NEXT: s_subb_u32 s3, 0, s9
; GCN-NEXT: s_mov_b32 s4, s0
-; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
+; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: s_mov_b32 s5, s1
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
-; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_mul_lo_u32 v2, s2, v1
@@ -1481,12 +1481,12 @@ define i64 @v_test_srem_k_num_i64(i64 %x) {
; GCN-NEXT: v_cvt_f32_u32_e32 v3, v1
; GCN-NEXT: v_sub_i32_e32 v4, vcc, 0, v0
; GCN-NEXT: v_subb_u32_e32 v5, vcc, 0, v1, vcc
-; GCN-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
+; GCN-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2
; GCN-NEXT: v_rcp_f32_e32 v2, v2
; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; GCN-NEXT: v_trunc_f32_e32 v3, v3
-; GCN-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
+; GCN-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3
; GCN-NEXT: v_mul_hi_u32 v6, v4, v2
@@ -1676,12 +1676,12 @@ define i64 @v_test_srem_pow2_k_num_i64(i64 %x) {
; GCN-NEXT: v_cvt_f32_u32_e32 v3, v1
; GCN-NEXT: v_sub_i32_e32 v4, vcc, 0, v0
; GCN-NEXT: v_subb_u32_e32 v5, vcc, 0, v1, vcc
-; GCN-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
+; GCN-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2
; GCN-NEXT: v_rcp_f32_e32 v2, v2
; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; GCN-NEXT: v_trunc_f32_e32 v3, v3
-; GCN-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
+; GCN-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3
; GCN-NEXT: v_mul_hi_u32 v6, v4, v2
diff --git a/llvm/test/CodeGen/AMDGPU/udiv.ll b/llvm/test/CodeGen/AMDGPU/udiv.ll
index cf30131b8ab58ab..012b3f976734dec 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv.ll
@@ -2527,7 +2527,7 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
; SI-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; SI-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; SI-NEXT: v_trunc_f32_e32 v3, v3
-; SI-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
+; SI-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
; SI-NEXT: v_cvt_u32_f32_e32 v2, v2
; SI-NEXT: v_cvt_u32_f32_e32 v3, v3
; SI-NEXT: v_mul_hi_u32 v4, v2, s4
@@ -2626,7 +2626,7 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
; VI-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; VI-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; VI-NEXT: v_trunc_f32_e32 v3, v3
-; VI-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
+; VI-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
; VI-NEXT: v_cvt_u32_f32_e32 v6, v2
; VI-NEXT: v_cvt_u32_f32_e32 v7, v3
; VI-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0
@@ -2713,7 +2713,7 @@ define i64 @v_test_udiv64_mulhi_fold(i64 %arg) {
; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; GCN-NEXT: v_trunc_f32_e32 v3, v3
-; GCN-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
+; GCN-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v6, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v7, v3
; GCN-NEXT: v_mad_u64_u32 v[2:3], s[4:5], v6, s6, 0
diff --git a/llvm/test/CodeGen/AMDGPU/udiv64.ll b/llvm/test/CodeGen/AMDGPU/udiv64.ll
index c5ab44e31c0320d..e23f3cfad89bc88 100644
--- a/llvm/test/CodeGen/AMDGPU/udiv64.ll
+++ b/llvm/test/CodeGen/AMDGPU/udiv64.ll
@@ -14,12 +14,12 @@ define amdgpu_kernel void @s_test_udiv_i64(ptr addrspace(1) %out, i64 %x, i64 %y
; GCN-NEXT: v_cvt_f32_u32_e32 v1, s9
; GCN-NEXT: s_sub_u32 s4, 0, s8
; GCN-NEXT: s_subb_u32 s5, 0, s9
-; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
+; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
-; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_mul_lo_u32 v2, s4, v1
@@ -211,12 +211,12 @@ define i64 @v_test_udiv_i64(i64 %x, i64 %y) {
; GCN-NEXT: v_cvt_f32_u32_e32 v5, v3
; GCN-NEXT: v_sub_i32_e32 v6, vcc, 0, v2
; GCN-NEXT: v_subb_u32_e32 v7, vcc, 0, v3, vcc
-; GCN-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
+; GCN-NEXT: v_madmk_f32 v4, v5, 0x4f800000, v4
; GCN-NEXT: v_rcp_f32_e32 v4, v4
; GCN-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GCN-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
; GCN-NEXT: v_trunc_f32_e32 v5, v5
-; GCN-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5
+; GCN-NEXT: v_madmk_f32 v4, v5, 0xcf800000, v4
; GCN-NEXT: v_cvt_u32_f32_e32 v5, v5
; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4
; GCN-NEXT: v_mul_lo_u32 v8, v6, v5
@@ -688,7 +688,7 @@ define amdgpu_kernel void @s_test_udiv24_i48(ptr addrspace(1) %out, i48 %x, i48
; GCN-NEXT: v_mul_f32_e32 v1, 0x5f7ffffc, v1
; GCN-NEXT: v_mul_f32_e32 v2, 0x2f800000, v1
; GCN-NEXT: v_trunc_f32_e32 v2, v2
-; GCN-NEXT: v_mac_f32_e32 v1, 0xcf800000, v2
+; GCN-NEXT: v_madmk_f32 v1, v2, 0xcf800000, v1
; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: s_mov_b32 s2, -1
@@ -886,12 +886,12 @@ define amdgpu_kernel void @s_test_udiv_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: v_cvt_f32_u32_e32 v1, s3
; GCN-NEXT: s_sub_u32 s4, 0, s2
; GCN-NEXT: s_subb_u32 s5, 0, s3
-; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
+; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
-; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_mul_lo_u32 v2, s4, v1
@@ -1067,12 +1067,12 @@ define i64 @v_test_udiv_pow2_k_num_i64(i64 %x) {
; GCN-NEXT: v_cvt_f32_u32_e32 v3, v1
; GCN-NEXT: v_sub_i32_e32 v4, vcc, 0, v0
; GCN-NEXT: v_subb_u32_e32 v5, vcc, 0, v1, vcc
-; GCN-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
+; GCN-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2
; GCN-NEXT: v_rcp_f32_e32 v2, v2
; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; GCN-NEXT: v_trunc_f32_e32 v3, v3
-; GCN-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
+; GCN-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3
; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2
; GCN-NEXT: v_mul_lo_u32 v6, v4, v3
@@ -1335,7 +1335,7 @@ define amdgpu_kernel void @s_test_udiv_k_den_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
-; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: s_mov_b32 s2, -1
@@ -1509,7 +1509,7 @@ define i64 @v_test_udiv_k_den_i64(i64 %x) {
; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; GCN-NEXT: v_trunc_f32_e32 v3, v3
-; GCN-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
+; GCN-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3
; GCN-NEXT: v_mul_hi_u32 v4, v2, s4
diff --git a/llvm/test/CodeGen/AMDGPU/urem64.ll b/llvm/test/CodeGen/AMDGPU/urem64.ll
index 894c96acbbcd6b1..f68d14a32b929a5 100644
--- a/llvm/test/CodeGen/AMDGPU/urem64.ll
+++ b/llvm/test/CodeGen/AMDGPU/urem64.ll
@@ -15,13 +15,13 @@ define amdgpu_kernel void @s_test_urem_i64(ptr addrspace(1) %out, i64 %x, i64 %y
; GCN-NEXT: s_sub_u32 s0, 0, s12
; GCN-NEXT: s_subb_u32 s1, 0, s13
; GCN-NEXT: s_mov_b32 s4, s8
-; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
+; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: s_mov_b32 s5, s9
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
-; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_mul_lo_u32 v2, s0, v1
@@ -221,12 +221,12 @@ define i64 @v_test_urem_i64(i64 %x, i64 %y) {
; GCN-NEXT: v_cvt_f32_u32_e32 v5, v3
; GCN-NEXT: v_sub_i32_e32 v6, vcc, 0, v2
; GCN-NEXT: v_subb_u32_e32 v7, vcc, 0, v3, vcc
-; GCN-NEXT: v_mac_f32_e32 v4, 0x4f800000, v5
+; GCN-NEXT: v_madmk_f32 v4, v5, 0x4f800000, v4
; GCN-NEXT: v_rcp_f32_e32 v4, v4
; GCN-NEXT: v_mul_f32_e32 v4, 0x5f7ffffc, v4
; GCN-NEXT: v_mul_f32_e32 v5, 0x2f800000, v4
; GCN-NEXT: v_trunc_f32_e32 v5, v5
-; GCN-NEXT: v_mac_f32_e32 v4, 0xcf800000, v5
+; GCN-NEXT: v_madmk_f32 v4, v5, 0xcf800000, v4
; GCN-NEXT: v_cvt_u32_f32_e32 v5, v5
; GCN-NEXT: v_cvt_u32_f32_e32 v4, v4
; GCN-NEXT: v_mul_lo_u32 v8, v6, v5
@@ -716,13 +716,13 @@ define amdgpu_kernel void @s_test_urem_k_num_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: s_sub_u32 s0, 0, s6
; GCN-NEXT: s_subb_u32 s1, 0, s7
; GCN-NEXT: s_mov_b32 s8, s4
-; GCN-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
+; GCN-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GCN-NEXT: v_rcp_f32_e32 v0, v0
; GCN-NEXT: s_mov_b32 s9, s5
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
-; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_mul_lo_u32 v2, s0, v1
@@ -903,7 +903,7 @@ define amdgpu_kernel void @s_test_urem_k_den_i64(ptr addrspace(1) %out, i64 %x)
; GCN-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GCN-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GCN-NEXT: v_trunc_f32_e32 v1, v1
-; GCN-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GCN-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v0, v0
; GCN-NEXT: v_cvt_u32_f32_e32 v1, v1
; GCN-NEXT: s_waitcnt lgkmcnt(0)
@@ -1086,12 +1086,12 @@ define i64 @v_test_urem_pow2_k_num_i64(i64 %x) {
; GCN-NEXT: v_cvt_f32_u32_e32 v3, v1
; GCN-NEXT: v_sub_i32_e32 v4, vcc, 0, v0
; GCN-NEXT: v_subb_u32_e32 v5, vcc, 0, v1, vcc
-; GCN-NEXT: v_mac_f32_e32 v2, 0x4f800000, v3
+; GCN-NEXT: v_madmk_f32 v2, v3, 0x4f800000, v2
; GCN-NEXT: v_rcp_f32_e32 v2, v2
; GCN-NEXT: v_mul_f32_e32 v2, 0x5f7ffffc, v2
; GCN-NEXT: v_mul_f32_e32 v3, 0x2f800000, v2
; GCN-NEXT: v_trunc_f32_e32 v3, v3
-; GCN-NEXT: v_mac_f32_e32 v2, 0xcf800000, v3
+; GCN-NEXT: v_madmk_f32 v2, v3, 0xcf800000, v2
; GCN-NEXT: v_cvt_u32_f32_e32 v3, v3
; GCN-NEXT: v_cvt_u32_f32_e32 v2, v2
; GCN-NEXT: v_mul_lo_u32 v6, v4, v3
diff --git a/llvm/test/CodeGen/AMDGPU/v_mac.ll b/llvm/test/CodeGen/AMDGPU/v_mac.ll
index 3df7f3c26aad82e..2b5762e1fa2a52f 100644
--- a/llvm/test/CodeGen/AMDGPU/v_mac.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_mac.ll
@@ -253,7 +253,7 @@ bb:
; SI: v_add_f32_e32 [[TMP2:v[0-9]+]], [[CVT_A]], [[CVT_A]]
; SI: v_mad_f32 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
-; SI: v_mac_f32_e32 v{{[0-9]+}}, 0x41000000, v{{[0-9]+}}
+; SI: v_madmk_f32 v{{[0-9]+}}, v{{[0-9]+}}, 0x41000000, v{{[0-9]+}}
; VI-FLUSH: v_add_f16_e32 [[TMP2:v[0-9]+]], [[A]], [[A]]
; VI-FLUSH: v_mad_f16 v{{[0-9]+}}, [[TMP2]], -4.0, 1.0
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index 94b822ac4887577..cadc23414dcac11 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -743,12 +743,12 @@ define amdgpu_kernel void @test_udiv64(ptr addrspace(1) %arg) #0 {
; GFX1032-NEXT: v_cvt_f32_u32_e32 v1, s5
; GFX1032-NEXT: s_sub_u32 s9, 0, s4
; GFX1032-NEXT: s_subb_u32 s10, 0, s5
-; GFX1032-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
+; GFX1032-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GFX1032-NEXT: v_rcp_f32_e32 v0, v0
; GFX1032-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX1032-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX1032-NEXT: v_trunc_f32_e32 v1, v1
-; GFX1032-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX1032-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX1032-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX1032-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX1032-NEXT: v_readfirstlane_b32 s0, v1
@@ -905,12 +905,12 @@ define amdgpu_kernel void @test_udiv64(ptr addrspace(1) %arg) #0 {
; GFX1064-NEXT: v_cvt_f32_u32_e32 v1, s5
; GFX1064-NEXT: s_sub_u32 s9, 0, s4
; GFX1064-NEXT: s_subb_u32 s10, 0, s5
-; GFX1064-NEXT: v_mac_f32_e32 v0, 0x4f800000, v1
+; GFX1064-NEXT: v_madmk_f32 v0, v1, 0x4f800000, v0
; GFX1064-NEXT: v_rcp_f32_e32 v0, v0
; GFX1064-NEXT: v_mul_f32_e32 v0, 0x5f7ffffc, v0
; GFX1064-NEXT: v_mul_f32_e32 v1, 0x2f800000, v0
; GFX1064-NEXT: v_trunc_f32_e32 v1, v1
-; GFX1064-NEXT: v_mac_f32_e32 v0, 0xcf800000, v1
+; GFX1064-NEXT: v_madmk_f32 v0, v1, 0xcf800000, v0
; GFX1064-NEXT: v_cvt_u32_f32_e32 v1, v1
; GFX1064-NEXT: v_cvt_u32_f32_e32 v0, v0
; GFX1064-NEXT: v_readfirstlane_b32 s8, v1
More information about the llvm-commits
mailing list