[llvm] [GlobalISel] Fold fmul x, 1.0 -> x (PR #65379)

Tue Sep 5 10:08:40 PDT 2023

https://github.com/aemerson updated https://github.com/llvm/llvm-project/pull/65379:

>From cde375d3b7d4ff12068e96015b9a6268f28fd141 Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara at apple.com>
Date: Fri, 1 Sep 2023 10:47:42 -0700
Subject: [PATCH 1/2] [GlobalISel] Fold fmul x, 1.0 -> x

---
 .../llvm/CodeGen/GlobalISel/CombinerHelper.h  |  6 +-
 .../include/llvm/Target/GlobalISel/Combine.td | 11 ++-
 .../lib/CodeGen/GlobalISel/CombinerHelper.cpp | 10 +++
 .../AArch64/GlobalISel/combine-mul.mir        | 37 ++++++++
 .../CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll     | 22 +----
 .../CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll     |  6 --
 .../CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll     | 48 ++++------
 llvm/test/CodeGen/AMDGPU/llvm.log.ll          | 36 +++-----
 llvm/test/CodeGen/AMDGPU/llvm.log10.ll        | 36 +++-----
 llvm/test/CodeGen/AMDGPU/llvm.log2.ll         |  6 +-
 llvm/test/CodeGen/AMDGPU/rsq.f64.ll           | 90 ++++++++-----------
 11 files changed, 149 insertions(+), 159 deletions(-)

diff --git a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
index 4f87590b9cbf97..75799e5eddcc67 100644
--- a/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
+++ b/llvm/include/llvm/CodeGen/GlobalISel/CombinerHelper.h
@@ -453,10 +453,14 @@ class CombinerHelper {
   /// equivalent instructions.
   bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2);
 
-  /// Return true if \p MOP is defined by a G_CONSTANT with a value equal to
+  /// Return true if \p MOP is defined by a G_CONSTANT or splat with a value equal to
   /// \p C.
   bool matchConstantOp(const MachineOperand &MOP, int64_t C);
 
+  /// Return true if \p MOP is defined by a G_FCONSTANT or splat with a value exactly
+  /// equal to \p C.
+  bool matchConstantFPOp(const MachineOperand &MOP, double C);
+
   /// Optimize (cond ? x : x) -> x
   bool matchSelectSameVal(MachineInstr &MI);
 
diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 774e8b15e97c01..ed66b6d5e78942 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -429,12 +429,21 @@ def right_identity_zero: GICombineRule<
 >;
 
 // Fold x op 1 -> x
-def right_identity_one: GICombineRule<
+def right_identity_one_int: GICombineRule<
   (defs root:$dst),
   (match (G_MUL $dst, $x, 1)),
   (apply (GIReplaceReg $dst, $x))
 >;
 
+def right_identity_one_fp: GICombineRule<
+  (defs root:$dst),
+  (match (G_FMUL $dst, $x, $y):$root,
+    [{ return Helper.matchConstantFPOp(${y}, 1.0); }]),
+  (apply (COPY $dst, $x))
+>;
+
+def right_identity_one : GICombineGroup<[right_identity_one_int, right_identity_one_fp]>;
+
 // Fold (x op x) - > x
 def binop_same_val_frags : GICombinePatFrag<
   (outs root:$dst), (ins $x),
diff --git a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
index e7862b7a26211e..17f030c473f80a 100644
--- a/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CombinerHelper.cpp
@@ -2563,6 +2563,16 @@ bool CombinerHelper::matchConstantOp(const MachineOperand &MOP, int64_t C) {
          MaybeCst->getSExtValue() == C;
 }
 
+bool CombinerHelper::matchConstantFPOp(const MachineOperand &MOP, double C) {
+  if (!MOP.isReg())
+    return false;
+  std::optional<FPValueAndVReg> MaybeCst;
+  if (!mi_match(MOP.getReg(), MRI, m_GFCstOrSplat(MaybeCst)))
+    return false;
+
+  return MaybeCst->Value.isExactlyValue(C);
+}
+
 void CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
                                                      unsigned OpIdx) {
   assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/combine-mul.mir b/llvm/test/CodeGen/AArch64/GlobalISel/combine-mul.mir
index 9812ea7708df2b..30a13f8f8f393d 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/combine-mul.mir
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/combine-mul.mir
@@ -64,6 +64,43 @@ body:             |
     $x0 = COPY %2(s64)
 ...
 ---
+name:            fmul_by_one
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $d0
+    ; CHECK-LABEL: name: fmul_by_one
+    ; CHECK: liveins: $d0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s64) = COPY $d0
+    ; CHECK-NEXT: $d0 = COPY [[COPY]](s64)
+    %0:_(s64) = COPY $d0
+    %1:_(s64) = G_FCONSTANT double 1.000000e+00
+    %2:_(s64) = G_FMUL %0, %1(s64)
+    $d0 = COPY %2(s64)
+...
+---
+name:            fmul_vector_by_one
+alignment:       4
+tracksRegLiveness: true
+frameInfo:
+  maxAlignment:    1
+machineFunctionInfo: {}
+body:             |
+  bb.0:
+    liveins: $q0
+    ; CHECK-LABEL: name: fmul_vector_by_one
+    ; CHECK: liveins: $q0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(<4 x s32>) = COPY $q0
+    ; CHECK-NEXT: $q0 = COPY [[COPY]](<4 x s32>)
+    %0:_(<4 x s32>) = COPY $q0
+    %1:_(s32) = G_FCONSTANT float 1.0
+    %2:_(<4 x s32>) = G_BUILD_VECTOR %1(s32), %1(s32), %1(s32), %1(s32)
+    %3:_(<4 x s32>) = G_FMUL %0, %2(<4 x s32>)
+    $q0 = COPY %3(<4 x s32>)
+...
+---
 name:            mul_vector_by_one
 alignment:       4
 tracksRegLiveness: true
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll
index 33a2c9b2ce8bd3..29233c77650b91 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f16.ll
@@ -1725,10 +1725,7 @@ define <2 x half> @v_rcp_v2f16_arcp(<2 x half> %x) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_rcp_f16_e32 v1, v0
-; GFX8-NEXT:    v_rcp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX8-NEXT:    v_mov_b32_e32 v2, 0x3c00
-; GFX8-NEXT:    v_mul_f16_e32 v1, 1.0, v1
-; GFX8-NEXT:    v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_rcp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1737,8 +1734,6 @@ define <2 x half> @v_rcp_v2f16_arcp(<2 x half> %x) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_rcp_f16_e32 v1, v0
 ; GFX9-NEXT:    v_rcp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX9-NEXT:    v_mul_f16_e32 v1, 1.0, v1
-; GFX9-NEXT:    v_mul_f16_e32 v0, 1.0, v0
 ; GFX9-NEXT:    v_pack_b32_f16 v0, v1, v0
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1747,8 +1742,6 @@ define <2 x half> @v_rcp_v2f16_arcp(<2 x half> %x) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_rcp_f16_e32 v1, v0
 ; GFX10-NEXT:    v_rcp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-NEXT:    v_mul_f16_e32 v1, 1.0, v1
-; GFX10-NEXT:    v_mul_f16_e32 v0, 1.0, v0
 ; GFX10-NEXT:    v_pack_b32_f16 v0, v1, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1759,8 +1752,6 @@ define <2 x half> @v_rcp_v2f16_arcp(<2 x half> %x) {
 ; GFX11-NEXT:    v_rcp_f16_e32 v0, v0
 ; GFX11-NEXT:    v_rcp_f16_e32 v1, v1
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_mul_f16_e32 v0, 1.0, v0
-; GFX11-NEXT:    v_mul_f16_e32 v1, 1.0, v1
 ; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %fdiv = fdiv arcp <2 x half> <half 1.0, half 1.0>, %x
@@ -1786,10 +1777,7 @@ define <2 x half> @v_rcp_v2f16_arcp_afn(<2 x half> %x) {
 ; GFX8:       ; %bb.0:
 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX8-NEXT:    v_rcp_f16_e32 v1, v0
-; GFX8-NEXT:    v_rcp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX8-NEXT:    v_mov_b32_e32 v2, 0x3c00
-; GFX8-NEXT:    v_mul_f16_e32 v1, 1.0, v1
-; GFX8-NEXT:    v_mul_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
+; GFX8-NEXT:    v_rcp_f16_sdwa v0, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:WORD_1
 ; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1798,8 +1786,6 @@ define <2 x half> @v_rcp_v2f16_arcp_afn(<2 x half> %x) {
 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX9-NEXT:    v_rcp_f16_e32 v1, v0
 ; GFX9-NEXT:    v_rcp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX9-NEXT:    v_mul_f16_e32 v1, 1.0, v1
-; GFX9-NEXT:    v_mul_f16_e32 v0, 1.0, v0
 ; GFX9-NEXT:    v_pack_b32_f16 v0, v1, v0
 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1808,8 +1794,6 @@ define <2 x half> @v_rcp_v2f16_arcp_afn(<2 x half> %x) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_rcp_f16_e32 v1, v0
 ; GFX10-NEXT:    v_rcp_f16_sdwa v0, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1
-; GFX10-NEXT:    v_mul_f16_e32 v1, 1.0, v1
-; GFX10-NEXT:    v_mul_f16_e32 v0, 1.0, v0
 ; GFX10-NEXT:    v_pack_b32_f16 v0, v1, v0
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
@@ -1820,8 +1804,6 @@ define <2 x half> @v_rcp_v2f16_arcp_afn(<2 x half> %x) {
 ; GFX11-NEXT:    v_rcp_f16_e32 v0, v0
 ; GFX11-NEXT:    v_rcp_f16_e32 v1, v1
 ; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_mul_f16_e32 v0, 1.0, v0
-; GFX11-NEXT:    v_mul_f16_e32 v1, 1.0, v1
 ; GFX11-NEXT:    v_pack_b32_f16 v0, v0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %fdiv = fdiv arcp afn <2 x half> <half 1.0, half 1.0>, %x
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
index 59ad39b2362ed8..25c7f5c8c531db 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f32.ll
@@ -1971,8 +1971,6 @@ define <2 x float> @v_rcp_v2f32_arcp_afn(<2 x float> %x) {
 ; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GCN-NEXT:    v_rcp_f32_e32 v0, v0
 ; GCN-NEXT:    v_rcp_f32_e32 v1, v1
-; GCN-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GCN-NEXT:    v_mul_f32_e32 v1, 1.0, v1
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_rcp_v2f32_arcp_afn:
@@ -1980,8 +1978,6 @@ define <2 x float> @v_rcp_v2f32_arcp_afn(<2 x float> %x) {
 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX10-NEXT:    v_rcp_f32_e32 v0, v0
 ; GFX10-NEXT:    v_rcp_f32_e32 v1, v1
-; GFX10-NEXT:    v_mul_f32_e32 v0, 1.0, v0
-; GFX10-NEXT:    v_mul_f32_e32 v1, 1.0, v1
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_rcp_v2f32_arcp_afn:
@@ -1989,8 +1985,6 @@ define <2 x float> @v_rcp_v2f32_arcp_afn(<2 x float> %x) {
 ; GFX11-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX11-NEXT:    v_rcp_f32_e32 v0, v0
 ; GFX11-NEXT:    v_rcp_f32_e32 v1, v1
-; GFX11-NEXT:    s_waitcnt_depctr 0xfff
-; GFX11-NEXT:    v_dual_mul_f32 v0, 1.0, v0 :: v_dual_mul_f32 v1, 1.0, v1
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %fdiv = fdiv arcp afn <2 x float> <float 1.0, float 1.0>, %x
   ret <2 x float> %fdiv
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
index a7eab3105a5252..78155453d908af 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/fdiv.f64.ll
@@ -440,9 +440,8 @@ define double @v_rcp_f64_arcp_afn(double %x) {
 ; GCN-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
 ; GCN-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; GCN-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; GCN-NEXT:    v_mul_f64 v[4:5], v[2:3], 1.0
-; GCN-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; GCN-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GCN-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; GCN-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_rcp_f64_arcp_afn:
@@ -453,9 +452,8 @@ define double @v_rcp_f64_arcp_afn(double %x) {
 ; GFX10-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
 ; GFX10-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; GFX10-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; GFX10-NEXT:    v_mul_f64 v[4:5], v[2:3], 1.0
-; GFX10-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; GFX10-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GFX10-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; GFX10-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_rcp_f64_arcp_afn:
@@ -469,10 +467,9 @@ define double @v_rcp_f64_arcp_afn(double %x) {
 ; GFX11-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
 ; GFX11-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; GFX11-NEXT:    v_mul_f64 v[4:5], v[2:3], 1.0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; GFX11-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; GFX11-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %fdiv = fdiv arcp afn double 1.0, %x
   ret double %fdiv
@@ -1436,12 +1433,10 @@ define <2 x double> @v_rcp_v2f64_arcp_afn(<2 x double> %x) {
 ; GCN-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
 ; GCN-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
 ; GCN-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; GCN-NEXT:    v_mul_f64 v[8:9], v[4:5], 1.0
-; GCN-NEXT:    v_mul_f64 v[10:11], v[6:7], 1.0
-; GCN-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[8:9], 1.0
-; GCN-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[10:11], 1.0
-; GCN-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
-; GCN-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
+; GCN-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
+; GCN-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
+; GCN-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
+; GCN-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
 ; GCN-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX10-LABEL: v_rcp_v2f64_arcp_afn:
@@ -1457,12 +1452,10 @@ define <2 x double> @v_rcp_v2f64_arcp_afn(<2 x double> %x) {
 ; GFX10-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
 ; GFX10-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
 ; GFX10-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; GFX10-NEXT:    v_mul_f64 v[8:9], v[4:5], 1.0
-; GFX10-NEXT:    v_mul_f64 v[10:11], v[6:7], 1.0
-; GFX10-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[8:9], 1.0
-; GFX10-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[10:11], 1.0
-; GFX10-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
-; GFX10-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
+; GFX10-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
+; GFX10-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
+; GFX10-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
+; GFX10-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX11-LABEL: v_rcp_v2f64_arcp_afn:
@@ -1483,14 +1476,11 @@ define <2 x double> @v_rcp_v2f64_arcp_afn(<2 x double> %x) {
 ; GFX11-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
 ; GFX11-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_mul_f64 v[8:9], v[4:5], 1.0
-; GFX11-NEXT:    v_mul_f64 v[10:11], v[6:7], 1.0
-; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[8:9], 1.0
-; GFX11-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[10:11], 1.0
+; GFX11-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
+; GFX11-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
 ; GFX11-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX11-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
-; GFX11-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
+; GFX11-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
+; GFX11-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
 ; GFX11-NEXT:    s_setpc_b64 s[30:31]
   %fdiv = fdiv arcp afn <2 x double> <double 1.0, double 1.0>, %x
   ret <2 x double> %fdiv
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log.ll b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
index 23aab3fa863d9d..3868addbfbff38 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log.ll
@@ -5614,9 +5614,8 @@ define float @v_log_f32_undef() {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x4f800000
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v1, s4, v1
-; SI-GISEL-NEXT:    v_mul_f32_e64 v2, s4, 1.0
 ; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; SI-GISEL-NEXT:    v_log_f32_e32 v0, v0
 ; SI-GISEL-NEXT:    s_mov_b32 s4, 0x3f317217
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3377d1cf
@@ -5656,15 +5655,14 @@ define float @v_log_f32_undef() {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x4f800000
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v1, s4, v1
-; VI-GISEL-NEXT:    v_mul_f32_e64 v2, s4, 1.0
 ; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; VI-GISEL-NEXT:    v_log_f32_e32 v0, v0
 ; VI-GISEL-NEXT:    v_and_b32_e32 v1, 0xfffff000, v0
 ; VI-GISEL-NEXT:    v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 0x3805fdf4, v2
-; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 0x3805fdf4, v1
-; VI-GISEL-NEXT:    v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT:    v_add_f32_e32 v3, v3, v4
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 0x3f317000, v2
 ; VI-GISEL-NEXT:    v_add_f32_e32 v2, v2, v3
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v1, 0x3f317000, v1
@@ -5698,9 +5696,8 @@ define float @v_log_f32_undef() {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x4f800000
 ; GFX900-GISEL-NEXT:    v_mul_f32_e32 v1, s4, v1
-; GFX900-GISEL-NEXT:    v_mul_f32_e64 v2, s4, 1.0
 ; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-GISEL-NEXT:    s_mov_b32 s4, 0x3f317217
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3377d1cf
@@ -5735,10 +5732,9 @@ define float @v_log_f32_undef() {
 ; GFX1100-GISEL:       ; %bb.0:
 ; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-GISEL-NEXT:    v_mul_f32_e64 v0, 0x4f800000, s0
-; GFX1100-GISEL-NEXT:    v_mul_f32_e64 v1, s0, 1.0
 ; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x800000, s0
 ; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e32 v0, s0, v0, vcc_lo
 ; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-GISEL-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
@@ -5790,9 +5786,8 @@ define float @v_log_f32_0() {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x4f800000
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v1, 0, v1
-; SI-GISEL-NEXT:    v_mul_f32_e64 v2, 0, 1.0
 ; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v0
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_log_f32_e32 v0, v0
 ; SI-GISEL-NEXT:    s_mov_b32 s4, 0x3f317217
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3377d1cf
@@ -5833,15 +5828,14 @@ define float @v_log_f32_0() {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x4f800000
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v1, 0, v1
-; VI-GISEL-NEXT:    v_mul_f32_e64 v2, 0, 1.0
 ; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v0
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_log_f32_e32 v0, v0
 ; VI-GISEL-NEXT:    v_and_b32_e32 v1, 0xfffff000, v0
 ; VI-GISEL-NEXT:    v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 0x3805fdf4, v2
-; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 0x3805fdf4, v1
-; VI-GISEL-NEXT:    v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 0x3805fdf4, v1
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 0x3805fdf4, v2
+; VI-GISEL-NEXT:    v_add_f32_e32 v3, v3, v4
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 0x3f317000, v2
 ; VI-GISEL-NEXT:    v_add_f32_e32 v2, v2, v3
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v1, 0x3f317000, v1
@@ -5876,9 +5870,8 @@ define float @v_log_f32_0() {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x4f800000
 ; GFX900-GISEL-NEXT:    v_mul_f32_e32 v1, 0, v1
-; GFX900-GISEL-NEXT:    v_mul_f32_e64 v2, 0, 1.0
 ; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v0
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-GISEL-NEXT:    s_mov_b32 s4, 0x3f317217
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3377d1cf
@@ -5915,10 +5908,9 @@ define float @v_log_f32_0() {
 ; GFX1100-GISEL:       ; %bb.0:
 ; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-GISEL-NEXT:    v_mul_f32_e64 v0, 0x4f800000, 0
-; GFX1100-GISEL-NEXT:    v_mul_f32_e64 v1, 0, 1.0
 ; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x800000, 0
 ; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc_lo
 ; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-GISEL-NEXT:    v_mul_f32_e32 v1, 0x3f317217, v0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
index 9df2cec4441336..9207c09669ff75 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log10.ll
@@ -5614,9 +5614,8 @@ define float @v_log10_f32_undef() {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x4f800000
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v1, s4, v1
-; SI-GISEL-NEXT:    v_mul_f32_e64 v2, s4, 1.0
 ; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; SI-GISEL-NEXT:    v_log_f32_e32 v0, v0
 ; SI-GISEL-NEXT:    s_mov_b32 s4, 0x3e9a209a
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3284fbcf
@@ -5656,15 +5655,14 @@ define float @v_log10_f32_undef() {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x4f800000
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v1, s4, v1
-; VI-GISEL-NEXT:    v_mul_f32_e64 v2, s4, 1.0
 ; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; VI-GISEL-NEXT:    v_log_f32_e32 v0, v0
 ; VI-GISEL-NEXT:    v_and_b32_e32 v1, 0xfffff000, v0
 ; VI-GISEL-NEXT:    v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 0x369a84fb, v2
-; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 0x369a84fb, v1
-; VI-GISEL-NEXT:    v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT:    v_add_f32_e32 v3, v3, v4
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 0x3e9a2000, v2
 ; VI-GISEL-NEXT:    v_add_f32_e32 v2, v2, v3
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v1, 0x3e9a2000, v1
@@ -5698,9 +5696,8 @@ define float @v_log10_f32_undef() {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x4f800000
 ; GFX900-GISEL-NEXT:    v_mul_f32_e32 v1, s4, v1
-; GFX900-GISEL-NEXT:    v_mul_f32_e64 v2, s4, 1.0
 ; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-GISEL-NEXT:    s_mov_b32 s4, 0x3e9a209a
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3284fbcf
@@ -5735,10 +5732,9 @@ define float @v_log10_f32_undef() {
 ; GFX1100-GISEL:       ; %bb.0:
 ; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-GISEL-NEXT:    v_mul_f32_e64 v0, 0x4f800000, s0
-; GFX1100-GISEL-NEXT:    v_mul_f32_e64 v1, s0, 1.0
 ; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x800000, s0
 ; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e32 v0, s0, v0, vcc_lo
 ; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-GISEL-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
@@ -5790,9 +5786,8 @@ define float @v_log10_f32_0() {
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x4f800000
 ; SI-GISEL-NEXT:    v_mul_f32_e32 v1, 0, v1
-; SI-GISEL-NEXT:    v_mul_f32_e64 v2, 0, 1.0
 ; SI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v0
-; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; SI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; SI-GISEL-NEXT:    v_log_f32_e32 v0, v0
 ; SI-GISEL-NEXT:    s_mov_b32 s4, 0x3e9a209a
 ; SI-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3284fbcf
@@ -5833,15 +5828,14 @@ define float @v_log10_f32_0() {
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
 ; VI-GISEL-NEXT:    v_mov_b32_e32 v1, 0x4f800000
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v1, 0, v1
-; VI-GISEL-NEXT:    v_mul_f32_e64 v2, 0, 1.0
 ; VI-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v0
-; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; VI-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; VI-GISEL-NEXT:    v_log_f32_e32 v0, v0
 ; VI-GISEL-NEXT:    v_and_b32_e32 v1, 0xfffff000, v0
 ; VI-GISEL-NEXT:    v_sub_f32_e32 v2, v0, v1
-; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 0x369a84fb, v2
-; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 0x369a84fb, v1
-; VI-GISEL-NEXT:    v_add_f32_e32 v3, v4, v3
+; VI-GISEL-NEXT:    v_mul_f32_e32 v3, 0x369a84fb, v1
+; VI-GISEL-NEXT:    v_mul_f32_e32 v4, 0x369a84fb, v2
+; VI-GISEL-NEXT:    v_add_f32_e32 v3, v3, v4
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v2, 0x3e9a2000, v2
 ; VI-GISEL-NEXT:    v_add_f32_e32 v2, v2, v3
 ; VI-GISEL-NEXT:    v_mul_f32_e32 v1, 0x3e9a2000, v1
@@ -5876,9 +5870,8 @@ define float @v_log10_f32_0() {
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v1, 0x4f800000
 ; GFX900-GISEL-NEXT:    v_mul_f32_e32 v1, 0, v1
-; GFX900-GISEL-NEXT:    v_mul_f32_e64 v2, 0, 1.0
 ; GFX900-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, 0, v0
-; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX900-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v1, vcc
 ; GFX900-GISEL-NEXT:    v_log_f32_e32 v0, v0
 ; GFX900-GISEL-NEXT:    s_mov_b32 s4, 0x3e9a209a
 ; GFX900-GISEL-NEXT:    v_mov_b32_e32 v3, 0x3284fbcf
@@ -5915,10 +5908,9 @@ define float @v_log10_f32_0() {
 ; GFX1100-GISEL:       ; %bb.0:
 ; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-GISEL-NEXT:    v_mul_f32_e64 v0, 0x4f800000, 0
-; GFX1100-GISEL-NEXT:    v_mul_f32_e64 v1, 0, 1.0
 ; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x800000, 0
 ; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX1100-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e32 v0, 0, v0, vcc_lo
 ; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, v0
 ; GFX1100-GISEL-NEXT:    s_waitcnt_depctr 0xfff
 ; GFX1100-GISEL-NEXT:    v_mul_f32_e32 v1, 0x3e9a209a, v0
diff --git a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
index 6485f13d0c89e9..af12c10fec5d6f 100644
--- a/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
+++ b/llvm/test/CodeGen/AMDGPU/llvm.log2.ll
@@ -2566,9 +2566,8 @@ define float @v_log2_f32_undef() {
 ; GFX689-GISEL-NEXT:    v_mov_b32_e32 v0, 0x800000
 ; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x4f800000
 ; GFX689-GISEL-NEXT:    v_mul_f32_e32 v1, s4, v1
-; GFX689-GISEL-NEXT:    v_mul_f32_e64 v2, s4, 1.0
 ; GFX689-GISEL-NEXT:    v_cmp_lt_f32_e32 vcc, s4, v0
-; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; GFX689-GISEL-NEXT:    v_log_f32_e32 v0, v0
 ; GFX689-GISEL-NEXT:    v_mov_b32_e32 v1, 0x42000000
 ; GFX689-GISEL-NEXT:    v_cndmask_b32_e32 v1, 0, v1, vcc
@@ -2585,10 +2584,9 @@ define float @v_log2_f32_undef() {
 ; GFX1100-GISEL:       ; %bb.0:
 ; GFX1100-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 ; GFX1100-GISEL-NEXT:    v_mul_f32_e64 v0, 0x4f800000, s0
-; GFX1100-GISEL-NEXT:    v_mul_f32_e64 v1, s0, 1.0
 ; GFX1100-GISEL-NEXT:    v_cmp_gt_f32_e64 vcc_lo, 0x800000, s0
 ; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2) | instskip(NEXT) | instid1(VALU_DEP_2)
-; GFX1100-GISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
+; GFX1100-GISEL-NEXT:    v_cndmask_b32_e32 v0, s0, v0, vcc_lo
 ; GFX1100-GISEL-NEXT:    v_cndmask_b32_e64 v1, 0, 0x42000000, vcc_lo
 ; GFX1100-GISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
 ; GFX1100-GISEL-NEXT:    v_log_f32_e32 v0, v0
diff --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index 046df70e95240d..0654d555766456 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -3132,9 +3132,8 @@ define double @v_rsq_f64__afn_fdiv(double %x) {
 ; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-SDAG-LABEL: v_rsq_f64__afn_fdiv:
@@ -3203,9 +3202,8 @@ define double @v_rsq_f64__afn_fdiv(double %x) {
 ; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
 ; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %sqrt = call contract double @llvm.sqrt.f64(double %x)
   %rsq = fdiv contract afn double 1.0, %sqrt
@@ -3279,9 +3277,8 @@ define double @v_rsq_f64__afn(double %x) {
 ; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-SDAG-LABEL: v_rsq_f64__afn:
@@ -3350,9 +3347,8 @@ define double @v_rsq_f64__afn(double %x) {
 ; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
 ; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %sqrt = call contract afn double @llvm.sqrt.f64(double %x)
   %rsq = fdiv contract afn double 1.0, %sqrt
@@ -3575,9 +3571,8 @@ define double @v_rsq_f64__afn_ninf(double %x) {
 ; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-SDAG-LABEL: v_rsq_f64__afn_ninf:
@@ -3646,9 +3641,8 @@ define double @v_rsq_f64__afn_ninf(double %x) {
 ; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
 ; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %sqrt = call contract afn ninf double @llvm.sqrt.f64(double %x)
   %rsq = fdiv contract afn ninf double 1.0, %sqrt
@@ -3722,9 +3716,8 @@ define double @v_rsq_f64__afn_nnan(double %x) {
 ; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-SDAG-LABEL: v_rsq_f64__afn_nnan:
@@ -3793,9 +3786,8 @@ define double @v_rsq_f64__afn_nnan(double %x) {
 ; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
 ; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %sqrt = call contract afn nnan double @llvm.sqrt.f64(double %x)
   %rsq = fdiv contract afn nnan double 1.0, %sqrt
@@ -3869,9 +3861,8 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
 ; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-SDAG-LABEL: v_rsq_f64__afn_nnan_ninf:
@@ -3940,9 +3931,8 @@ define double @v_rsq_f64__afn_nnan_ninf(double %x) {
 ; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
 ; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %sqrt = call contract afn nnan ninf double @llvm.sqrt.f64(double %x)
   %rsq = fdiv contract afn nnan ninf double 1.0, %sqrt
@@ -4383,12 +4373,10 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
 ; SI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
 ; SI-GISEL-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; SI-GISEL-NEXT:    v_mul_f64 v[8:9], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_mul_f64 v[10:11], v[6:7], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[8:9], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[10:11], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
-; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-SDAG-LABEL: v_rsq_v2f64__afn_nnan_ninf:
@@ -4506,12 +4494,10 @@ define <2 x double> @v_rsq_v2f64__afn_nnan_ninf(<2 x double> %x) {
 ; VI-GISEL-NEXT:    v_fma_f64 v[10:11], -v[2:3], v[6:7], 1.0
 ; VI-GISEL-NEXT:    v_fma_f64 v[4:5], v[8:9], v[4:5], v[4:5]
 ; VI-GISEL-NEXT:    v_fma_f64 v[6:7], v[10:11], v[6:7], v[6:7]
-; VI-GISEL-NEXT:    v_mul_f64 v[8:9], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_mul_f64 v[10:11], v[6:7], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[8:9], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[10:11], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[8:9]
-; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[10:11]
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[2:3], -v[2:3], v[6:7], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[4:5], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[6:7]
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %sqrt = call contract afn nnan ninf <2 x double> @llvm.sqrt.v2f64(<2 x double> %x)
   %rsq = fdiv contract afn nnan ninf <2 x double> <double 1.0, double 1.0>, %sqrt
@@ -4587,9 +4573,8 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) #0 {
 ; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
 ; SI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
 ; SI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
 ; SI-GISEL-NEXT:    ; return to shader part epilog
@@ -4662,9 +4647,8 @@ define amdgpu_ps <2 x i32> @s_rsq_f64_unsafe(double inreg %x) #0 {
 ; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
 ; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
 ; VI-GISEL-NEXT:    v_readfirstlane_b32 s0, v0
 ; VI-GISEL-NEXT:    v_readfirstlane_b32 s1, v1
 ; VI-GISEL-NEXT:    ; return to shader part epilog
@@ -4747,9 +4731,8 @@ define double @v_rsq_f64_unsafe(double %x) #0 {
 ; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-SDAG-LABEL: v_rsq_f64_unsafe:
@@ -4818,9 +4801,8 @@ define double @v_rsq_f64_unsafe(double %x) #0 {
 ; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
 ; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], 1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[2:3], 1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[2:3]
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %sqrt = call double @llvm.sqrt.f64(double %x)
   %rsq = fdiv double 1.0, %sqrt

>From 930ec7271bf715d0f3e27887b864f55d17187cad Mon Sep 17 00:00:00 2001
From: Amara Emerson <amara at apple.com>
Date: Tue, 5 Sep 2023 10:08:17 -0700
Subject: [PATCH 2/2] Use the new GIReplaceReg builtin.

---
 llvm/include/llvm/Target/GlobalISel/Combine.td | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index ed66b6d5e78942..977d5cba532102 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -439,7 +439,7 @@ def right_identity_one_fp: GICombineRule<
   (defs root:$dst),
   (match (G_FMUL $dst, $x, $y):$root,
     [{ return Helper.matchConstantFPOp(${y}, 1.0); }]),
-  (apply (COPY $dst, $x))
+  (apply (GIReplaceReg $dst, $x))
 >;
 
 def right_identity_one : GICombineGroup<[right_identity_one_int, right_identity_one_fp]>;