[llvm] 5db49f7 - [GlobalISel] replace right identity X * -1.0 with fneg(x) (#80526)

via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 21 01:42:03 PST 2024


Author: Nick Anderson
Date: 2024-02-21T09:41:59Z
New Revision: 5db49f726619b943d8201ef3867393923836cb2f

URL: https://github.com/llvm/llvm-project/commit/5db49f726619b943d8201ef3867393923836cb2f
DIFF: https://github.com/llvm/llvm-project/commit/5db49f726619b943d8201ef3867393923836cb2f.diff

LOG: [GlobalISel] replace right identity X * -1.0 with fneg(x) (#80526)

follow up patch to #78673

@Pierre-vh @jayfoad @arsenm Could you review when you have a chance.

Added: 
    llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fpneg-one-fneg.mir

Modified: 
    llvm/include/llvm/Target/GlobalISel/Combine.td
    llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
    llvm/test/CodeGen/AMDGPU/rsq.f64.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Target/GlobalISel/Combine.td b/llvm/include/llvm/Target/GlobalISel/Combine.td
index 7eadb718f16415..17757ca3e41111 100644
--- a/llvm/include/llvm/Target/GlobalISel/Combine.td
+++ b/llvm/include/llvm/Target/GlobalISel/Combine.td
@@ -493,6 +493,13 @@ def right_identity_one_fp: GICombineRule<
   (apply (GIReplaceReg $dst, $x))
 >;
 
+def right_identity_neg_one_fp: GICombineRule<
+  (defs root:$dst),
+  (match (G_FMUL $dst, $x, $y):$root,
+    [{ return Helper.matchConstantFPOp(${y}, -1.0); }]),
+  (apply (G_FNEG $dst, $x))
+>;
+
 def right_identity_one : GICombineGroup<[right_identity_one_int, right_identity_one_fp]>;
 
 // Fold (x op x) - > x
@@ -1283,7 +1290,8 @@ def identity_combines : GICombineGroup<[select_same_val, right_identity_zero,
                                         trunc_buildvector_fold,
                                         trunc_lshr_buildvector_fold,
                                         bitcast_bitcast_fold, fptrunc_fpext_fold,
-                                        right_identity_neg_zero_fp]>;
+                                        right_identity_neg_zero_fp,
+                                        right_identity_neg_one_fp]>;
 
 def const_combines : GICombineGroup<[constant_fold_fp_ops, const_ptradd_to_i2p,
                                      overlapping_and, mulo_by_2, mulo_by_0,

diff  --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fpneg-one-fneg.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fpneg-one-fneg.mir
new file mode 100644
index 00000000000000..8ec2778992e23c
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/combine-fpneg-one-fneg.mir
@@ -0,0 +1,216 @@
+# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 4
+# RUN: llc -march=amdgcn -mcpu=gfx1010 -run-pass=amdgpu-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK
+
+---
+name:            test_neg_one_f16_sgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0
+
+    ; CHECK-LABEL: name: test_neg_one_f16_sgpr
+    ; CHECK: liveins: $sgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+    ; CHECK-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: %d:_(s16) = G_FNEG %x
+    ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %d(s16)
+    ; CHECK-NEXT: $sgpr0 = COPY %ext(s32)
+    %0:_(s32) = COPY $sgpr0
+    %x:_(s16) = G_TRUNC %0:_(s32)
+    %y:_(s16) = G_FCONSTANT half -1.0
+    %d:_(s16) = G_FMUL %x, %y
+    %ext:_(s32) = G_ANYEXT %d:_(s16)
+    $sgpr0 = COPY %ext
+
+...
+
+---
+name:            test_neg_one_f32_sgpr
+body:             |
+  bb.0:
+    liveins: $sgpr0
+
+    ; CHECK-LABEL: name: test_neg_one_f32_sgpr
+    ; CHECK: liveins: $sgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: $sgpr0 = COPY [[FNEG]](s32)
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = G_FCONSTANT float -1.0
+    %2:_(s32) = G_FMUL %0, %1
+    $sgpr0 = COPY %2(s32)
+
+...
+
+---
+name:            test_neg_one_f64_sgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $sgpr0
+
+    ; CHECK-LABEL: name: test_neg_one_f64_sgpr
+    ; CHECK: liveins: $sgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+    ; CHECK-NEXT: %x:_(s64) = G_ANYEXT [[COPY]](s32)
+    ; CHECK-NEXT: %d:_(s64) = G_FNEG %x
+    ; CHECK-NEXT: %ext:_(s32) = G_TRUNC %d(s64)
+    ; CHECK-NEXT: $sgpr0 = COPY %ext(s32)
+    %0:_(s32) = COPY $sgpr0
+    %x:_(s64) = G_ANYEXT %0:_(s32)
+    %y:_(s64) = G_FCONSTANT double -1.0
+    %d:_(s64) = G_FMUL %x, %y
+    %ext:_(s32) = G_TRUNC %d:_(s64)
+    $sgpr0 = COPY %ext
+
+...
+
+---
+name:            test_neg_ten_f32_sgpr
+body:             |
+  bb.0:
+    liveins: $sgpr0
+
+    ; CHECK-LABEL: name: test_neg_ten_f32_sgpr
+    ; CHECK: liveins: $sgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+01
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]]
+    ; CHECK-NEXT: $sgpr0 = COPY [[FMUL]](s32)
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = G_FCONSTANT float -10.0
+    %2:_(s32) = G_FMUL %0, %1
+    $sgpr0 = COPY %2(s32)
+
+...
+
+---
+name:            test_neg_fract_f32_sgpr
+body:             |
+  bb.0:
+    liveins: $sgpr0
+
+    ; CHECK-LABEL: name: test_neg_fract_f32_sgpr
+    ; CHECK: liveins: $sgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $sgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -5.000000e-01
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]]
+    ; CHECK-NEXT: $sgpr0 = COPY [[FMUL]](s32)
+    %0:_(s32) = COPY $sgpr0
+    %1:_(s32) = G_FCONSTANT float -0.5
+    %2:_(s32) = G_FMUL %0, %1
+    $sgpr0 = COPY %2(s32)
+
+...
+
+---
+name:            test_neg_one_f16_vgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_neg_one_f16_vgpr
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: %x:_(s16) = G_TRUNC [[COPY]](s32)
+    ; CHECK-NEXT: %d:_(s16) = G_FNEG %x
+    ; CHECK-NEXT: %ext:_(s32) = G_ANYEXT %d(s16)
+    ; CHECK-NEXT: $vgpr0 = COPY %ext(s32)
+    %0:_(s32) = COPY $vgpr0
+    %x:_(s16) = G_TRUNC %0:_(s32)
+    %y:_(s16) = G_FCONSTANT half -1.0
+    %d:_(s16) = G_FMUL %x, %y
+    %ext:_(s32) = G_ANYEXT %d:_(s16)
+    $vgpr0 = COPY %ext
+
+...
+
+---
+name:            test_neg_one_f32_vgpr
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_neg_one_f32_vgpr
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[FNEG:%[0-9]+]]:_(s32) = G_FNEG [[COPY]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FNEG]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_FCONSTANT float -1.0
+    %2:_(s32) = G_FMUL %0, %1
+    $vgpr0 = COPY %2(s32)
+
+...
+
+---
+name:            test_neg_one_f64_vgpr
+tracksRegLiveness: true
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_neg_one_f64_vgpr
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: %x:_(s64) = G_ANYEXT [[COPY]](s32)
+    ; CHECK-NEXT: %d:_(s64) = G_FNEG %x
+    ; CHECK-NEXT: %ext:_(s32) = G_TRUNC %d(s64)
+    ; CHECK-NEXT: $vgpr0 = COPY %ext(s32)
+    %0:_(s32) = COPY $vgpr0
+    %x:_(s64) = G_ANYEXT %0:_(s32)
+    %y:_(s64) = G_FCONSTANT double -1.0
+    %d:_(s64) = G_FMUL %x, %y
+    %ext:_(s32) = G_TRUNC %d:_(s64)
+    $vgpr0 = COPY %ext
+
+...
+
+---
+name:            test_neg_ten_f32_vgpr
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_neg_ten_f32_vgpr
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -1.000000e+01
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FMUL]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_FCONSTANT float -10.0
+    %2:_(s32) = G_FMUL %0, %1
+    $vgpr0 = COPY %2(s32)
+
+...
+
+---
+name:            test_neg_fract_f32_vgpr
+body:             |
+  bb.0:
+    liveins: $vgpr0
+
+    ; CHECK-LABEL: name: test_neg_fract_f32_vgpr
+    ; CHECK: liveins: $vgpr0
+    ; CHECK-NEXT: {{  $}}
+    ; CHECK-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0
+    ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float -5.000000e-01
+    ; CHECK-NEXT: [[FMUL:%[0-9]+]]:_(s32) = G_FMUL [[COPY]], [[C]]
+    ; CHECK-NEXT: $vgpr0 = COPY [[FMUL]](s32)
+    %0:_(s32) = COPY $vgpr0
+    %1:_(s32) = G_FCONSTANT float -0.5
+    %2:_(s32) = G_FMUL %0, %1
+    $vgpr0 = COPY %2(s32)
+
+...

diff  --git a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
index 3e658c6f385329..711a5fff1a0634 100644
--- a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
@@ -218,41 +218,11 @@ define float @v_mul_neg2_f32(float %x) {
 }
 
 define float @v_mul_neg1_f32(float %x) {
-; GFX9-SDAG-LABEL: v_mul_neg1_f32:
-; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
-; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_mul_neg1_f32:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_mul_f32_e32 v0, -1.0, v0
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: v_mul_neg1_f32:
-; GFX10-SDAG:       ; %bb.0:
-; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
-; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: v_mul_neg1_f32:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT:    v_mul_f32_e32 v0, -1.0, v0
-; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SDAG-LABEL: v_mul_neg1_f32:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: v_mul_neg1_f32:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_mul_f32_e32 v0, -1.0, v0
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GCN-LABEL: v_mul_neg1_f32:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_xor_b32_e32 v0, 0x80000000, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %mul = fmul float %x, -1.0
   ret float %mul
 }
@@ -1356,41 +1326,11 @@ define double @v_mul_0_f64(double %x) {
 }
 
 define double @v_mul_neg1_f64(double %x) {
-; GFX9-SDAG-LABEL: v_mul_neg1_f64:
-; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
-; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_mul_neg1_f64:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_mul_f64 v[0:1], v[0:1], -1.0
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: v_mul_neg1_f64:
-; GFX10-SDAG:       ; %bb.0:
-; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
-; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: v_mul_neg1_f64:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT:    v_mul_f64 v[0:1], v[0:1], -1.0
-; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SDAG-LABEL: v_mul_neg1_f64:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: v_mul_neg1_f64:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_mul_f64 v[0:1], v[0:1], -1.0
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GCN-LABEL: v_mul_neg1_f64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_xor_b32_e32 v1, 0x80000000, v1
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %mul = fmul double %x, -1.0
   ret double %mul
 }
@@ -2848,41 +2788,11 @@ define half @v_mul_neg2_f16(half %x) {
 }
 
 define half @v_mul_neg1_f16(half %x) {
-; GFX9-SDAG-LABEL: v_mul_neg1_f16:
-; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
-; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_mul_neg1_f16:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_mul_f16_e32 v0, -1.0, v0
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: v_mul_neg1_f16:
-; GFX10-SDAG:       ; %bb.0:
-; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
-; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: v_mul_neg1_f16:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT:    v_mul_f16_e32 v0, -1.0, v0
-; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SDAG-LABEL: v_mul_neg1_f16:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: v_mul_neg1_f16:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_mul_f16_e32 v0, -1.0, v0
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GCN-LABEL: v_mul_neg1_f16:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_xor_b32_e32 v0, 0x8000, v0
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %mul = fmul half %x, -1.0
   ret half %mul
 }
@@ -7112,41 +7022,11 @@ define double @v_mul_fabs_neg2_f64(double %x) {
 }
 
 define double @v_mul_fabs_neg1_f64(double %x) {
-; GFX9-SDAG-LABEL: v_mul_fabs_neg1_f64:
-; GFX9-SDAG:       ; %bb.0:
-; GFX9-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT:    v_or_b32_e32 v1, 0x80000000, v1
-; GFX9-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_mul_fabs_neg1_f64:
-; GFX9-GISEL:       ; %bb.0:
-; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT:    v_mul_f64 v[0:1], |v[0:1]|, -1.0
-; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-SDAG-LABEL: v_mul_fabs_neg1_f64:
-; GFX10-SDAG:       ; %bb.0:
-; GFX10-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT:    v_or_b32_e32 v1, 0x80000000, v1
-; GFX10-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX10-GISEL-LABEL: v_mul_fabs_neg1_f64:
-; GFX10-GISEL:       ; %bb.0:
-; GFX10-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT:    v_mul_f64 v[0:1], |v[0:1]|, -1.0
-; GFX10-GISEL-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-SDAG-LABEL: v_mul_fabs_neg1_f64:
-; GFX11-SDAG:       ; %bb.0:
-; GFX11-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT:    v_or_b32_e32 v1, 0x80000000, v1
-; GFX11-SDAG-NEXT:    s_setpc_b64 s[30:31]
-;
-; GFX11-GISEL-LABEL: v_mul_fabs_neg1_f64:
-; GFX11-GISEL:       ; %bb.0:
-; GFX11-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT:    v_mul_f64 v[0:1], |v[0:1]|, -1.0
-; GFX11-GISEL-NEXT:    s_setpc_b64 s[30:31]
+; GCN-LABEL: v_mul_fabs_neg1_f64:
+; GCN:       ; %bb.0:
+; GCN-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT:    v_or_b32_e32 v1, 0x80000000, v1
+; GCN-NEXT:    s_setpc_b64 s[30:31]
   %fabs.x = call double @llvm.fabs.f64(double %x)
   %mul = fmul double %fabs.x, -1.0
   ret double %mul

diff  --git a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
index 90175298a99ac2..bd6e1f54e636d8 100644
--- a/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
+++ b/llvm/test/CodeGen/AMDGPU/rsq.f64.ll
@@ -3431,9 +3431,8 @@ define double @v_neg_rsq_f64__afn(double %x) {
 ; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], -1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-SDAG-LABEL: v_neg_rsq_f64__afn:
@@ -3503,9 +3502,8 @@ define double @v_neg_rsq_f64__afn(double %x) {
 ; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
 ; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], -1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %sqrt = call contract afn double @llvm.sqrt.f64(double %x)
   %rsq = fdiv contract afn double -1.0, %sqrt
@@ -4015,9 +4013,8 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
 ; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
 ; SI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; SI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; SI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], -1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
-; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
+; SI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
 ; SI-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; VI-SDAG-LABEL: v_neg_rsq_f64__afn_nnan_ninf:
@@ -4087,9 +4084,8 @@ define double @v_neg_rsq_f64__afn_nnan_ninf(double %x) {
 ; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
 ; VI-GISEL-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
 ; VI-GISEL-NEXT:    v_fma_f64 v[2:3], v[4:5], v[2:3], v[2:3]
-; VI-GISEL-NEXT:    v_mul_f64 v[4:5], v[2:3], -1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[4:5], -1.0
-; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], v[4:5]
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -1.0
+; VI-GISEL-NEXT:    v_fma_f64 v[0:1], v[0:1], v[2:3], -v[2:3]
 ; VI-GISEL-NEXT:    s_setpc_b64 s[30:31]
   %sqrt = call contract afn nnan ninf double @llvm.sqrt.f64(double %x)
   %rsq = fdiv contract afn nnan ninf double -1.0, %sqrt


        


More information about the llvm-commits mailing list