[llvm] [PeepholeOptimizer] Recognize new move-immediate instructions (PR #72128)

via llvm-commits llvm-commits at lists.llvm.org
Mon Nov 13 07:38:00 PST 2023


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-amdgpu

Author: Jay Foad (jayfoad)

<details>
<summary>Changes</summary>

Folding a move-immediate into another move can create a new
move-immediate. Recognize these and use them as sources for further
folding.

For the AMDGPU target this happens with sequences like:

  s_mov_b32 s0, 12345
  v_mov_b32 v0, s0

The second instruction will be folded to:

  v_mov_b32 v0, 12345

With this patch, the immediate value 12345 can then be folded into
further uses of v0.


---

Patch is 146.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/72128.diff


10 Files Affected:

- (modified) llvm/lib/CodeGen/PeepholeOptimizer.cpp (+1) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll (+18-18) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll (+18-18) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll (+19-19) 
- (modified) llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll (+21-21) 
- (modified) llvm/test/CodeGen/AMDGPU/fma.f16.ll (+3-3) 
- (modified) llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll (+27-6) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.log.ll (+281-566) 
- (modified) llvm/test/CodeGen/AMDGPU/llvm.log10.ll (+281-566) 
- (modified) llvm/test/CodeGen/AMDGPU/mad-mix.ll (+20-41) 


``````````diff
diff --git a/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
index 76b3b16af16bdc7..5914450162c8e18 100644
--- a/llvm/lib/CodeGen/PeepholeOptimizer.cpp
+++ b/llvm/lib/CodeGen/PeepholeOptimizer.cpp
@@ -1811,6 +1811,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
             LocalMIs.erase(MI);
             continue;
           }
+          isMoveImmediate(*MI, ImmDefRegs, ImmDefMIs);
         }
       }
 
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
index 3eb6f1eced0957f..7d3e0208cdb9dbe 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/sdiv.i64.ll
@@ -33,12 +33,12 @@ define i64 @v_sdiv_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v6, v1
 ; CHECK-NEXT:    v_sub_i32_e32 v10, vcc, 0, v2
 ; CHECK-NEXT:    v_subb_u32_e32 v11, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v6
+; CHECK-NEXT:    v_madmk_f32 v3, v6, 0x4f800000, v3
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v3, v3
 ; CHECK-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v3
 ; CHECK-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v3
 ; CHECK-NEXT:    v_trunc_f32_e32 v8, v6
-; CHECK-NEXT:    v_mac_f32_e32 v3, 0xcf800000, v8
+; CHECK-NEXT:    v_madmk_f32 v3, v8, 0xcf800000, v3
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v9, v3
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v12, v8
 ; CHECK-NEXT:    v_mad_u64_u32 v[6:7], s[4:5], v10, v9, 0
@@ -215,13 +215,13 @@ define amdgpu_ps i64 @s_sdiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    s_mov_b32 s7, s6
 ; CHECK-NEXT:    s_xor_b64 s[12:13], s[0:1], s[6:7]
 ; CHECK-NEXT:    s_sub_u32 s3, 0, s10
-; CHECK-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; CHECK-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; CHECK-NEXT:    s_subb_u32 s5, 0, s11
 ; CHECK-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; CHECK-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; CHECK-NEXT:    v_trunc_f32_e32 v2, v1
-; CHECK-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v2
+; CHECK-NEXT:    v_madmk_f32 v0, v2, 0xcf800000, v0
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v0
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v2
 ; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s3, v3, 0
@@ -669,12 +669,12 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v4, v1
 ; CGP-NEXT:    v_sub_i32_e32 v13, vcc, 0, v2
 ; CGP-NEXT:    v_subb_u32_e32 v14, vcc, 0, v1, vcc
-; CGP-NEXT:    v_mac_f32_e32 v3, 0x4f800000, v4
+; CGP-NEXT:    v_madmk_f32 v3, v4, 0x4f800000, v3
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v3, v3
 ; CGP-NEXT:    v_mul_f32_e32 v3, 0x5f7ffffc, v3
 ; CGP-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v3
 ; CGP-NEXT:    v_trunc_f32_e32 v5, v4
-; CGP-NEXT:    v_mac_f32_e32 v3, 0xcf800000, v5
+; CGP-NEXT:    v_madmk_f32 v3, v5, 0xcf800000, v3
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v12, v3
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v15, v5
 ; CGP-NEXT:    v_mad_u64_u32 v[3:4], s[4:5], v13, v12, 0
@@ -842,12 +842,12 @@ define <2 x i64> @v_sdiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
 ; CGP-NEXT:    v_sub_i32_e32 v11, vcc, 0, v4
 ; CGP-NEXT:    v_subb_u32_e32 v12, vcc, 0, v3, vcc
-; CGP-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v6
+; CGP-NEXT:    v_madmk_f32 v5, v6, 0x4f800000, v5
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v5, v5
 ; CGP-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
 ; CGP-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v5
 ; CGP-NEXT:    v_trunc_f32_e32 v7, v6
-; CGP-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
+; CGP-NEXT:    v_madmk_f32 v5, v7, 0xcf800000, v5
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v10, v5
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v13, v7
 ; CGP-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v11, v10, 0
@@ -1002,12 +1002,12 @@ define i64 @v_sdiv_i64_pow2k_denom(i64 %num) {
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0x1000
 ; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v3, 0
 ; CHECK-NEXT:    v_mov_b32_e32 v6, 0xfffff000
-; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; CHECK-NEXT:    v_madmk_f32 v2, v3, 0x4f800000, v2
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
 ; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
 ; CHECK-NEXT:    v_trunc_f32_e32 v4, v3
-; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v4
+; CHECK-NEXT:    v_madmk_f32 v2, v4, 0xcf800000, v2
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v5, v2
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v7, v4
 ; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v5, 0
@@ -1607,12 +1607,12 @@ define i64 @v_sdiv_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0x12d8fb
 ; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v3, 0
 ; CHECK-NEXT:    v_mov_b32_e32 v6, 0xffed2705
-; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; CHECK-NEXT:    v_madmk_f32 v2, v3, 0x4f800000, v2
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
 ; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
 ; CHECK-NEXT:    v_trunc_f32_e32 v4, v3
-; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v4
+; CHECK-NEXT:    v_madmk_f32 v2, v4, 0xcf800000, v2
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v5, v2
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v7, v4
 ; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v5, 0
@@ -2237,12 +2237,12 @@ define i64 @v_sdiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v6, v1
 ; CHECK-NEXT:    v_sub_i32_e32 v9, vcc, 0, v2
 ; CHECK-NEXT:    v_subb_u32_e32 v10, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_mac_f32_e32 v5, 0x4f800000, v6
+; CHECK-NEXT:    v_madmk_f32 v5, v6, 0x4f800000, v5
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v5, v5
 ; CHECK-NEXT:    v_mul_f32_e32 v5, 0x5f7ffffc, v5
 ; CHECK-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v5
 ; CHECK-NEXT:    v_trunc_f32_e32 v7, v6
-; CHECK-NEXT:    v_mac_f32_e32 v5, 0xcf800000, v7
+; CHECK-NEXT:    v_madmk_f32 v5, v7, 0xcf800000, v5
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v8, v5
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v11, v7
 ; CHECK-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0
@@ -2693,12 +2693,12 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v11, v1
 ; CGP-NEXT:    v_sub_i32_e32 v14, vcc, 0, v4
 ; CGP-NEXT:    v_subb_u32_e32 v15, vcc, 0, v1, vcc
-; CGP-NEXT:    v_mac_f32_e32 v10, 0x4f800000, v11
+; CGP-NEXT:    v_madmk_f32 v10, v11, 0x4f800000, v10
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v10, v10
 ; CGP-NEXT:    v_mul_f32_e32 v10, 0x5f7ffffc, v10
 ; CGP-NEXT:    v_mul_f32_e32 v11, 0x2f800000, v10
 ; CGP-NEXT:    v_trunc_f32_e32 v12, v11
-; CGP-NEXT:    v_mac_f32_e32 v10, 0xcf800000, v12
+; CGP-NEXT:    v_madmk_f32 v10, v12, 0xcf800000, v10
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v13, v10
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v16, v12
 ; CGP-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0
@@ -2868,12 +2868,12 @@ define <2 x i64> @v_sdiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v8, v3
 ; CGP-NEXT:    v_sub_i32_e32 v12, vcc, 0, v4
 ; CGP-NEXT:    v_subb_u32_e32 v13, vcc, 0, v3, vcc
-; CGP-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v8
+; CGP-NEXT:    v_madmk_f32 v6, v8, 0x4f800000, v6
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v6, v6
 ; CGP-NEXT:    v_mul_f32_e32 v6, 0x5f7ffffc, v6
 ; CGP-NEXT:    v_mul_f32_e32 v8, 0x2f800000, v6
 ; CGP-NEXT:    v_trunc_f32_e32 v10, v8
-; CGP-NEXT:    v_mac_f32_e32 v6, 0xcf800000, v10
+; CGP-NEXT:    v_madmk_f32 v6, v10, 0xcf800000, v6
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v11, v6
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v14, v10
 ; CGP-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
index 0b22b3b3a4ba7c6..fc904cda5d279ce 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/srem.i64.ll
@@ -33,12 +33,12 @@ define i64 @v_srem_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v3, v1
 ; CHECK-NEXT:    v_sub_i32_e32 v9, vcc, 0, v0
 ; CHECK-NEXT:    v_subb_u32_e32 v10, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; CHECK-NEXT:    v_madmk_f32 v2, v3, 0x4f800000, v2
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
 ; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
 ; CHECK-NEXT:    v_trunc_f32_e32 v6, v3
-; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v6
+; CHECK-NEXT:    v_madmk_f32 v2, v6, 0xcf800000, v2
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v8, v2
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v11, v6
 ; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v9, v8, 0
@@ -209,13 +209,13 @@ define amdgpu_ps i64 @s_srem_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    s_mov_b32 s7, s6
 ; CHECK-NEXT:    s_xor_b64 s[10:11], s[10:11], s[6:7]
 ; CHECK-NEXT:    s_sub_u32 s3, 0, s8
-; CHECK-NEXT:    v_mac_f32_e32 v0, 0x4f800000, v1
+; CHECK-NEXT:    v_madmk_f32 v0, v1, 0x4f800000, v0
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; CHECK-NEXT:    s_subb_u32 s5, 0, s9
 ; CHECK-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; CHECK-NEXT:    v_mul_f32_e32 v1, 0x2f800000, v0
 ; CHECK-NEXT:    v_trunc_f32_e32 v2, v1
-; CHECK-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v2
+; CHECK-NEXT:    v_madmk_f32 v0, v2, 0xcf800000, v0
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v3, v0
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v2
 ; CHECK-NEXT:    v_mad_u64_u32 v[0:1], s[0:1], s3, v3, 0
@@ -655,12 +655,12 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v3, v1
 ; CGP-NEXT:    v_sub_i32_e32 v12, vcc, 0, v0
 ; CGP-NEXT:    v_subb_u32_e32 v13, vcc, 0, v1, vcc
-; CGP-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; CGP-NEXT:    v_madmk_f32 v2, v3, 0x4f800000, v2
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v2
 ; CGP-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; CGP-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
 ; CGP-NEXT:    v_trunc_f32_e32 v4, v3
-; CGP-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v4
+; CGP-NEXT:    v_madmk_f32 v2, v4, 0xcf800000, v2
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v5, v2
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v14, v4
 ; CGP-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v12, v5, 0
@@ -824,12 +824,12 @@ define <2 x i64> @v_srem_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v5, v3
 ; CGP-NEXT:    v_sub_i32_e32 v10, vcc, 0, v2
 ; CGP-NEXT:    v_subb_u32_e32 v11, vcc, 0, v3, vcc
-; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v5
+; CGP-NEXT:    v_madmk_f32 v4, v5, 0x4f800000, v4
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
 ; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; CGP-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v4
 ; CGP-NEXT:    v_trunc_f32_e32 v6, v5
-; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
+; CGP-NEXT:    v_madmk_f32 v4, v6, 0xcf800000, v4
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v7, v4
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v12, v6
 ; CGP-NEXT:    v_mad_u64_u32 v[4:5], s[4:5], v10, v7, 0
@@ -980,12 +980,12 @@ define i64 @v_srem_i64_pow2k_denom(i64 %num) {
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0x1000
 ; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v3, 0
 ; CHECK-NEXT:    v_mov_b32_e32 v6, 0xfffff000
-; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; CHECK-NEXT:    v_madmk_f32 v2, v3, 0x4f800000, v2
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
 ; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
 ; CHECK-NEXT:    v_trunc_f32_e32 v4, v3
-; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v4
+; CHECK-NEXT:    v_madmk_f32 v2, v4, 0xcf800000, v2
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v5, v2
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v7, v4
 ; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v5, 0
@@ -1575,12 +1575,12 @@ define i64 @v_srem_i64_oddk_denom(i64 %num) {
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v2, 0x12d8fb
 ; CHECK-NEXT:    v_cvt_f32_ubyte0_e32 v3, 0
 ; CHECK-NEXT:    v_mov_b32_e32 v6, 0xffed2705
-; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v3
+; CHECK-NEXT:    v_madmk_f32 v2, v3, 0x4f800000, v2
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
 ; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; CHECK-NEXT:    v_mul_f32_e32 v3, 0x2f800000, v2
 ; CHECK-NEXT:    v_trunc_f32_e32 v4, v3
-; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v4
+; CHECK-NEXT:    v_madmk_f32 v2, v4, 0xcf800000, v2
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v5, v2
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v7, v4
 ; CHECK-NEXT:    v_mad_u64_u32 v[2:3], s[4:5], v6, v5, 0
@@ -2195,12 +2195,12 @@ define i64 @v_srem_i64_pow2_shl_denom(i64 %x, i64 %y) {
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v5, v1
 ; CHECK-NEXT:    v_sub_i32_e32 v9, vcc, 0, v0
 ; CHECK-NEXT:    v_subb_u32_e32 v10, vcc, 0, v1, vcc
-; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v5
+; CHECK-NEXT:    v_madmk_f32 v2, v5, 0x4f800000, v2
 ; CHECK-NEXT:    v_rcp_iflag_f32_e32 v2, v2
 ; CHECK-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; CHECK-NEXT:    v_mul_f32_e32 v5, 0x2f800000, v2
 ; CHECK-NEXT:    v_trunc_f32_e32 v7, v5
-; CHECK-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v7
+; CHECK-NEXT:    v_madmk_f32 v2, v7, 0xcf800000, v2
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v8, v2
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v11, v7
 ; CHECK-NEXT:    v_mad_u64_u32 v[5:6], s[4:5], v9, v8, 0
@@ -2645,12 +2645,12 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v10, v1
 ; CGP-NEXT:    v_sub_i32_e32 v14, vcc, 0, v0
 ; CGP-NEXT:    v_subb_u32_e32 v15, vcc, 0, v1, vcc
-; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v10
+; CGP-NEXT:    v_madmk_f32 v4, v10, 0x4f800000, v4
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
 ; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; CGP-NEXT:    v_mul_f32_e32 v10, 0x2f800000, v4
 ; CGP-NEXT:    v_trunc_f32_e32 v12, v10
-; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v12
+; CGP-NEXT:    v_madmk_f32 v4, v12, 0xcf800000, v4
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v13, v4
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v16, v12
 ; CGP-NEXT:    v_mad_u64_u32 v[10:11], s[4:5], v14, v13, 0
@@ -2819,12 +2819,12 @@ define <2 x i64> @v_srem_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v6, v3
 ; CGP-NEXT:    v_sub_i32_e32 v12, vcc, 0, v2
 ; CGP-NEXT:    v_subb_u32_e32 v13, vcc, 0, v3, vcc
-; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v6
+; CGP-NEXT:    v_madmk_f32 v4, v6, 0x4f800000, v4
 ; CGP-NEXT:    v_rcp_iflag_f32_e32 v4, v4
 ; CGP-NEXT:    v_mul_f32_e32 v4, 0x5f7ffffc, v4
 ; CGP-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v4
 ; CGP-NEXT:    v_trunc_f32_e32 v6, v6
-; CGP-NEXT:    v_mac_f32_e32 v4, 0xcf800000, v6
+; CGP-NEXT:    v_madmk_f32 v4, v6, 0xcf800000, v4
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v11, v4
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v6, v6
 ; CGP-NEXT:    v_mad_u64_u32 v[8:9], s[4:5], v12, v11, 0
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
index 3add708d1a6394d..342d04141ced45e 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/udiv.i64.ll
@@ -28,12 +28,12 @@ define i64 @v_udiv_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, v3
 ; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, 0, v2
 ; CHECK-NEXT:    v_subb_u32_e32 v7, vcc, 0, v3, vcc
-; CHECK-NEXT:    v_mac_f32_e32 v6, 0x4f800000, v0
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v6
+; CHECK-NEXT:    v_madmk_f32 v0, v0, 0x4f800000, v6
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; CHECK-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; CHECK-NEXT:    v_mul_f32_e32 v6, 0x2f800000, v0
 ; CHECK-NEXT:    v_trunc_f32_e32 v6, v6
-; CHECK-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v6
+; CHECK-NEXT:    v_madmk_f32 v0, v6, 0xcf800000, v0
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v6, v6
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; CHECK-NEXT:    v_mul_lo_u32 v8, v1, v6
@@ -205,7 +205,7 @@ define amdgpu_ps i64 @s_udiv_i64(i64 inreg %num, i64 inreg %den) {
 ; CHECK-NEXT:    v_mul_f32_e32 v1, 0x5f7ffffc, v1
 ; CHECK-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v1
 ; CHECK-NEXT:    v_trunc_f32_e32 v4, v4
-; CHECK-NEXT:    v_mac_f32_e32 v1, 0xcf800000, v4
+; CHECK-NEXT:    v_madmk_f32 v1, v4, 0xcf800000, v1
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v1, v1
 ; CHECK-NEXT:    v_mul_lo_u32 v5, s4, v4
@@ -636,12 +636,12 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v5
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, 0, v4
 ; CGP-NEXT:    v_subb_u32_e32 v3, vcc, 0, v5, vcc
-; CGP-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v0
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v2
+; CGP-NEXT:    v_madmk_f32 v0, v0, 0x4f800000, v2
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; CGP-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; CGP-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v0
 ; CGP-NEXT:    v_trunc_f32_e32 v2, v2
-; CGP-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v2
+; CGP-NEXT:    v_madmk_f32 v0, v2, 0xcf800000, v0
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; CGP-NEXT:    v_mul_lo_u32 v12, v1, v2
@@ -803,12 +803,12 @@ define <2 x i64> @v_udiv_v2i64(<2 x i64> %num, <2 x i64> %den) {
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v7
 ; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0, v6
 ; CGP-NEXT:    v_subb_u32_e32 v5, vcc, 0, v7, vcc
-; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v2
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v4
+; CGP-NEXT:    v_madmk_f32 v2, v2, 0x4f800000, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v2
 ; CGP-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; CGP-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v2
 ; CGP-NEXT:    v_trunc_f32_e32 v4, v4
-; CGP-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v4
+; CGP-NEXT:    v_madmk_f32 v2, v4, 0xcf800000, v2
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
 ; CGP-NEXT:    v_mul_lo_u32 v10, v3, v4
@@ -1091,12 +1091,12 @@ define i64 @v_udiv_i64_pow2_shl_denom(i64 %x, i64 %y) {
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, v6
 ; CHECK-NEXT:    v_sub_i32_e32 v1, vcc, 0, v5
 ; CHECK-NEXT:    v_subb_u32_e32 v7, vcc, 0, v6, vcc
-; CHECK-NEXT:    v_mac_f32_e32 v2, 0x4f800000, v0
-; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v2
+; CHECK-NEXT:    v_madmk_f32 v0, v0, 0x4f800000, v2
+; CHECK-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; CHECK-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; CHECK-NEXT:    v_mul_f32_e32 v2, 0x2f800000, v0
 ; CHECK-NEXT:    v_trunc_f32_e32 v2, v2
-; CHECK-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v2
+; CHECK-NEXT:    v_madmk_f32 v0, v2, 0xcf800000, v0
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v2, v2
 ; CHECK-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; CHECK-NEXT:    v_mul_lo_u32 v8, v1, v2
@@ -1526,12 +1526,12 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v0, v3
 ; CGP-NEXT:    v_sub_i32_e32 v1, vcc, 0, v2
 ; CGP-NEXT:    v_subb_u32_e32 v12, vcc, 0, v3, vcc
-; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v0
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v4
+; CGP-NEXT:    v_madmk_f32 v0, v0, 0x4f800000, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v0, v0
 ; CGP-NEXT:    v_mul_f32_e32 v0, 0x5f7ffffc, v0
 ; CGP-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v0
 ; CGP-NEXT:    v_trunc_f32_e32 v4, v4
-; CGP-NEXT:    v_mac_f32_e32 v0, 0xcf800000, v4
+; CGP-NEXT:    v_madmk_f32 v0, v4, 0xcf800000, v0
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v0, v0
 ; CGP-NEXT:    v_mul_lo_u32 v13, v1, v4
@@ -1695,12 +1695,12 @@ define <2 x i64> @v_udiv_v2i64_pow2_shl_denom(<2 x i64> %x, <2 x i64> %y) {
 ; CGP-NEXT:    v_cvt_f32_u32_e32 v2, v10
 ; CGP-NEXT:    v_sub_i32_e32 v3, vcc, 0, v9
 ; CGP-NEXT:    v_subb_u32_e32 v6, vcc, 0, v10, vcc
-; CGP-NEXT:    v_mac_f32_e32 v4, 0x4f800000, v2
-; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v4
+; CGP-NEXT:    v_madmk_f32 v2, v2, 0x4f800000, v4
+; CGP-NEXT:    v_rcp_iflag_f32_e32 v2, v2
 ; CGP-NEXT:    v_mul_f32_e32 v2, 0x5f7ffffc, v2
 ; CGP-NEXT:    v_mul_f32_e32 v4, 0x2f800000, v2
 ; CGP-NEXT:    v_trunc_f32_e32 v4, v4
-; CGP-NEXT:    v_mac_f32_e32 v2, 0xcf800000, v4
+; CGP-NEXT:    v_madmk_f32 v2, v4, 0xcf800000, v2
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v4, v4
 ; CGP-NEXT:    v_cvt_u32_f32_e32 v2, v2
 ; CGP-NEXT:    v_mul_lo_u32 v8, v3, v4
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
index 12df4b7c7fc33d7..1d6fc87e7989efd 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/urem.i64.ll
@@ -28,12 +28,12 @@ define i64 @v_urem_i64(i64 %num, i64 %den) {
 ; CHECK-NEXT:    v_cvt_f32_u32_e32 v0, v3
 ; CHECK-NEXT:...
[truncated]

``````````

</details>


https://github.com/llvm/llvm-project/pull/72128


More information about the llvm-commits mailing list