[llvm] ca34f1b - AMDGPU: Add baseline test for folding fsub into fneg modifiers

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Jul 20 15:29:40 PDT 2023


Author: Matt Arsenault
Date: 2023-07-20T18:29:35-04:00
New Revision: ca34f1bdcd38f778f00e4bedfebc5fde8b7302b3

URL: https://github.com/llvm/llvm-project/commit/ca34f1bdcd38f778f00e4bedfebc5fde8b7302b3
DIFF: https://github.com/llvm/llvm-project/commit/ca34f1bdcd38f778f00e4bedfebc5fde8b7302b3.diff

LOG: AMDGPU: Add baseline test for folding fsub into fneg modifiers

Added: 
    llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll b/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll
new file mode 100644
index 00000000000000..561a73b54b3bd1
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/fsub-as-fneg-src-modifier.ll
@@ -0,0 +1,1323 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
+; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=CHECK,SDAG %s
+; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefixes=CHECK,GISEL %s
+
+; Test that fneg is folded into source modifiers when it wasn't
+; possible to fold fsub to fneg without context.
+
+define float @no_fold_f32_fsub_into_fneg_modifier_ieee_pos1(float %v0, float %v1) #0 {
+; CHECK-LABEL: no_fold_f32_fsub_into_fneg_modifier_ieee_pos1:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_sub_f32_e32 v0, 1.0, v0
+; CHECK-NEXT:    v_mul_f32_e32 v0, v0, v1
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float 1.0, %v0
+  %mul = fmul float %sub, %v1
+  ret float %mul
+}
+
+define float @no_fold_f32_fsub_into_fneg_modifier_daz_pos1(float %v0, float %v1) #1 {
+; CHECK-LABEL: no_fold_f32_fsub_into_fneg_modifier_daz_pos1:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_sub_f32_e32 v0, 1.0, v0
+; CHECK-NEXT:    v_mul_f32_e32 v0, v0, v1
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float 1.0, %v0
+  %mul = fmul float %sub, %v1
+  ret float %mul
+}
+
+define float @no_fold_f32_fsub_into_fneg_modifier_ieee_commuted(float %v0, float %v1) #0 {
+; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_ieee_commuted:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_add_f32_e32 v0, 0, v0
+; SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_ieee_commuted:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_subrev_f32_e32 v0, 0x80000000, v0
+; GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float %v0, -0.0
+  %mul = fmul float %sub, %v1
+  ret float %mul
+}
+
+define float @fold_f32_fsub_into_fneg_modifier_ieee_pos0(float %v0, float %v1) #0 {
+; CHECK-LABEL: fold_f32_fsub_into_fneg_modifier_ieee_pos0:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_sub_f32_e32 v0, 0, v0
+; CHECK-NEXT:    v_mul_f32_e32 v0, v0, v1
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float 0.0, %v0
+  %mul = fmul float %sub, %v1
+  ret float %mul
+}
+
+define float @fold_f32_fsub_into_fneg_modifier_daz_pos0(float %v0, float %v1) #1 {
+; CHECK-LABEL: fold_f32_fsub_into_fneg_modifier_daz_pos0:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_sub_f32_e32 v0, 0, v0
+; CHECK-NEXT:    v_mul_f32_e32 v0, v0, v1
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float 0.0, %v0
+  %mul = fmul float %sub, %v1
+  ret float %mul
+}
+
+define float @no_fold_f32_fsub_into_fneg_modifier_daz_commuted(float %v0, float %v1) #1 {
+; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_daz_commuted:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_add_f32_e32 v0, 0, v0
+; SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_daz_commuted:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_subrev_f32_e32 v0, 0x80000000, v0
+; GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float %v0, -0.0
+  %mul = fmul float %sub, %v1
+  ret float %mul
+}
+
+define float @fold_f32_fsub_into_fneg_modifier_ieee(float %v0, float %v1) #0 {
+; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_ieee:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mul_f32_e64 v0, -v0, v1
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_ieee:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float -0.0, %v0
+  %mul = fmul float %sub, %v1
+  ret float %mul
+}
+
+define float @fold_f32_fsub_into_fneg_modifier_daz(float %v0, float %v1) #1 {
+; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_daz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f32_e32 v0, 0x80000000, v0
+; SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_daz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float -0.0, %v0
+  %mul = fmul float %sub, %v1
+  ret float %mul
+}
+
+define float @fold_f32_fsub_into_fneg_modifier_ieee_nsz(float %v0, float %v1) #0 {
+; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_ieee_nsz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mul_f32_e64 v0, -v0, v1
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_ieee_nsz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub nsz float -0.0, %v0
+  %mul = fmul nsz float %sub, %v1
+  ret float %mul
+}
+
+define float @fold_f32_fsub_into_fneg_modifier_daz_nsz(float %v0, float %v1) #1 {
+; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_daz_nsz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f32_e32 v0, 0x80000000, v0
+; SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_daz_nsz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub nsz float -0.0, %v0
+  %mul = fmul nsz float %sub, %v1
+  ret float %mul
+}
+
+define float @fold_f32_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #2 {
+; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f32_e32 v0, 0x80000000, v0
+; SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float -0.0, %v0
+  %mul = fmul float %sub, %v1
+  ret float %mul
+}
+
+define float @fold_f32_fsub_into_fneg_modifier_dynamic_nsz(float %v0, float %v1) #2 {
+; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic_nsz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f32_e32 v0, 0x80000000, v0
+; SDAG-NEXT:    v_mul_f32_e32 v0, v0, v1
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_dynamic_nsz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_mul_f32_e32 v0, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub nsz float -0.0, %v0
+  %mul = fmul nsz float %sub, %v1
+  ret float %mul
+}
+
+define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_ieee(<2 x float> %v0, <2 x float> %v1) #0 {
+; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_ieee:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mul_f32_e64 v0, -v0, v2
+; SDAG-NEXT:    v_mul_f32_e64 v1, -v1, v3
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_ieee:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
+; GISEL-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GISEL-NEXT:    v_mul_f32_e32 v1, v1, v3
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub <2 x float> <float -0.0, float -0.0>, %v0
+  %mul = fmul <2 x float> %sub, %v1
+  ret <2 x float> %mul
+}
+
+define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_daz(<2 x float> %v0, <2 x float> %v1) #1 {
+; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f32_e32 v1, 0x80000000, v1
+; SDAG-NEXT:    v_sub_f32_e32 v0, 0x80000000, v0
+; SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; SDAG-NEXT:    v_mul_f32_e32 v1, v1, v3
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
+; GISEL-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GISEL-NEXT:    v_mul_f32_e32 v1, v1, v3
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub <2 x float> <float -0.0, float -0.0>, %v0
+  %mul = fmul <2 x float> %sub, %v1
+  ret <2 x float> %mul
+}
+
+define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_ieee_nsz(<2 x float> %v0, <2 x float> %v1) #0 {
+; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_ieee_nsz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mul_f32_e64 v0, -v0, v2
+; SDAG-NEXT:    v_mul_f32_e64 v1, -v1, v3
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_ieee_nsz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
+; GISEL-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GISEL-NEXT:    v_mul_f32_e32 v1, v1, v3
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub nsz <2 x float> <float -0.0, float -0.0>, %v0
+  %mul = fmul nsz <2 x float> %sub, %v1
+  ret <2 x float> %mul
+}
+
+define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_daz_nsz(<2 x float> %v0, <2 x float> %v1) #1 {
+; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz_nsz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f32_e32 v1, 0x80000000, v1
+; SDAG-NEXT:    v_sub_f32_e32 v0, 0x80000000, v0
+; SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; SDAG-NEXT:    v_mul_f32_e32 v1, v1, v3
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_daz_nsz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
+; GISEL-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GISEL-NEXT:    v_mul_f32_e32 v1, v1, v3
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub nsz <2 x float> <float -0.0, float -0.0>, %v0
+  %mul = fmul nsz <2 x float> %sub, %v1
+  ret <2 x float> %mul
+}
+
+define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_dynamic(<2 x float> %v0, <2 x float> %v1) #2 {
+; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f32_e32 v1, 0x80000000, v1
+; SDAG-NEXT:    v_sub_f32_e32 v0, 0x80000000, v0
+; SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; SDAG-NEXT:    v_mul_f32_e32 v1, v1, v3
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
+; GISEL-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GISEL-NEXT:    v_mul_f32_e32 v1, v1, v3
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub <2 x float> <float -0.0, float -0.0>, %v0
+  %mul = fmul <2 x float> %sub, %v1
+  ret <2 x float> %mul
+}
+
+define <2 x float> @fold_v2f32_fsub_into_fneg_modifier_dynamic_nsz(<2 x float> %v0, <2 x float> %v1) #2 {
+; SDAG-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic_nsz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f32_e32 v1, 0x80000000, v1
+; SDAG-NEXT:    v_sub_f32_e32 v0, 0x80000000, v0
+; SDAG-NEXT:    v_mul_f32_e32 v0, v0, v2
+; SDAG-NEXT:    v_mul_f32_e32 v1, v1, v3
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_v2f32_fsub_into_fneg_modifier_dynamic_nsz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
+; GISEL-NEXT:    v_mul_f32_e32 v0, v0, v2
+; GISEL-NEXT:    v_mul_f32_e32 v1, v1, v3
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub nsz <2 x float> <float -0.0, float -0.0>, %v0
+  %mul = fmul nsz <2 x float> %sub, %v1
+  ret <2 x float> %mul
+}
+
+
+define half @fold_f16_fsub_into_fneg_modifier_ieee(half %v0, half %v1) #0 {
+; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_ieee:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mul_f16_e64 v0, -v0, v1
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_ieee:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f16_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_mul_f16_e32 v0, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub half -0.0, %v0
+  %mul = fmul half %sub, %v1
+  ret half %mul
+}
+
+define half @fold_f16_fsub_into_fneg_modifier_daz(half %v0, half %v1) #1 {
+; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_daz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f16_e32 v0, 0x8000, v0
+; SDAG-NEXT:    v_mul_f16_e32 v0, v0, v1
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_daz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f16_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_mul_f16_e32 v0, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub half -0.0, %v0
+  %mul = fmul half %sub, %v1
+  ret half %mul
+}
+
+define half @fold_f16_fsub_into_fneg_modifier_ieee_nsz(half %v0, half %v1) #0 {
+; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_ieee_nsz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mul_f16_e64 v0, -v0, v1
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_ieee_nsz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f16_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_mul_f16_e32 v0, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub nsz half -0.0, %v0
+  %mul = fmul nsz half %sub, %v1
+  ret half %mul
+}
+
+define half @fold_f16_fsub_into_fneg_modifier_daz_nsz(half %v0, half %v1) #1 {
+; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_daz_nsz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f16_e32 v0, 0x8000, v0
+; SDAG-NEXT:    v_mul_f16_e32 v0, v0, v1
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_daz_nsz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f16_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_mul_f16_e32 v0, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub nsz half -0.0, %v0
+  %mul = fmul nsz half %sub, %v1
+  ret half %mul
+}
+
+define half @fold_f16_fsub_into_fneg_modifier_dynamic(half %v0, half %v1) #2 {
+; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f16_e32 v0, 0x8000, v0
+; SDAG-NEXT:    v_mul_f16_e32 v0, v0, v1
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f16_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_mul_f16_e32 v0, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub half -0.0, %v0
+  %mul = fmul half %sub, %v1
+  ret half %mul
+}
+
+define half @fold_f16_fsub_into_fneg_modifier_dynamic_nsz(half %v0, half %v1) #2 {
+; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic_nsz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f16_e32 v0, 0x8000, v0
+; SDAG-NEXT:    v_mul_f16_e32 v0, v0, v1
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_dynamic_nsz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f16_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_mul_f16_e32 v0, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub nsz half -0.0, %v0
+  %mul = fmul nsz half %sub, %v1
+  ret half %mul
+}
+
+define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_ieee(<2 x half> %v0, <2 x half> %v1) #0 {
+; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_ieee:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_v2f16_fsub_into_fneg_modifier_ieee:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_pk_max_f16 v0, v0, v0 neg_lo:[1,1] neg_hi:[1,1]
+; GISEL-NEXT:    v_pk_mul_f16 v0, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub <2 x half> <half -0.0, half -0.0>, %v0
+  %mul = fmul <2 x half> %sub, %v1
+  ret <2 x half> %mul
+}
+
+define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_daz(<2 x half> %v0, <2 x half> %v1) #1 {
+; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_daz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_v2f16_fsub_into_fneg_modifier_daz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_pk_max_f16 v0, v0, v0 neg_lo:[1,1] neg_hi:[1,1]
+; GISEL-NEXT:    v_pk_mul_f16 v0, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub <2 x half> <half -0.0, half -0.0>, %v0
+  %mul = fmul <2 x half> %sub, %v1
+  ret <2 x half> %mul
+}
+
+define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_ieee_nsz(<2 x half> %v0, <2 x half> %v1) #0 {
+; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_ieee_nsz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_v2f16_fsub_into_fneg_modifier_ieee_nsz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_pk_max_f16 v0, v0, v0 neg_lo:[1,1] neg_hi:[1,1]
+; GISEL-NEXT:    v_pk_mul_f16 v0, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub nsz <2 x half> <half -0.0, half -0.0>, %v0
+  %mul = fmul nsz <2 x half> %sub, %v1
+  ret <2 x half> %mul
+}
+
+define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_daz_nsz(<2 x half> %v0, <2 x half> %v1) #1 {
+; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_daz_nsz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_v2f16_fsub_into_fneg_modifier_daz_nsz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_pk_max_f16 v0, v0, v0 neg_lo:[1,1] neg_hi:[1,1]
+; GISEL-NEXT:    v_pk_mul_f16 v0, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub nsz <2 x half> <half -0.0, half -0.0>, %v0
+  %mul = fmul nsz <2 x half> %sub, %v1
+  ret <2 x half> %mul
+}
+
+define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_dynamic(<2 x half> %v0, <2 x half> %v1) #2 {
+; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_dynamic:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_v2f16_fsub_into_fneg_modifier_dynamic:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_pk_max_f16 v0, v0, v0 neg_lo:[1,1] neg_hi:[1,1]
+; GISEL-NEXT:    v_pk_mul_f16 v0, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub <2 x half> <half -0.0, half -0.0>, %v0
+  %mul = fmul <2 x half> %sub, %v1
+  ret <2 x half> %mul
+}
+
+define <2 x half> @fold_v2f16_fsub_into_fneg_modifier_dynamic_nsz(<2 x half> %v0, <2 x half> %v1) #2 {
+; SDAG-LABEL: fold_v2f16_fsub_into_fneg_modifier_dynamic_nsz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_pk_mul_f16 v0, v0, v1 neg_lo:[1,0] neg_hi:[1,0]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_v2f16_fsub_into_fneg_modifier_dynamic_nsz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_pk_max_f16 v0, v0, v0 neg_lo:[1,1] neg_hi:[1,1]
+; GISEL-NEXT:    v_pk_mul_f16 v0, v0, v1
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub nsz <2 x half> <half -0.0, half -0.0>, %v0
+  %mul = fmul nsz <2 x half> %sub, %v1
+  ret <2 x half> %mul
+}
+
+define double @fold_f64_fsub_into_fneg_modifier_ieee(double %v0, double %v1) #0 {
+; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_ieee:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mul_f64 v[0:1], -v[0:1], v[2:3]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f64_fsub_into_fneg_modifier_ieee:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
+; GISEL-NEXT:    v_mul_f64 v[0:1], -v[0:1], v[2:3]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub double -0.0, %v0
+  %mul = fmul double %sub, %v1
+  ret double %mul
+}
+
+define double @fold_f64_fsub_into_fneg_modifier_daz(double %v0, double %v1) #1 {
+; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_daz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mul_f64 v[0:1], -v[0:1], v[2:3]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f64_fsub_into_fneg_modifier_daz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
+; GISEL-NEXT:    v_mul_f64 v[0:1], -v[0:1], v[2:3]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub double -0.0, %v0
+  %mul = fmul double %sub, %v1
+  ret double %mul
+}
+
+define double @fold_f64_fsub_into_fneg_modifier_ieee_nsz(double %v0, double %v1) #0 {
+; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_ieee_nsz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mul_f64 v[0:1], -v[0:1], v[2:3]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f64_fsub_into_fneg_modifier_ieee_nsz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
+; GISEL-NEXT:    v_mul_f64 v[0:1], -v[0:1], v[2:3]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub nsz double -0.0, %v0
+  %mul = fmul nsz double %sub, %v1
+  ret double %mul
+}
+
+define double @fold_f64_fsub_into_fneg_modifier_daz_nsz(double %v0, double %v1) #1 {
+; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_daz_nsz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mul_f64 v[0:1], -v[0:1], v[2:3]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f64_fsub_into_fneg_modifier_daz_nsz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
+; GISEL-NEXT:    v_mul_f64 v[0:1], -v[0:1], v[2:3]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub nsz double -0.0, %v0
+  %mul = fmul nsz double %sub, %v1
+  ret double %mul
+}
+
+define double @fold_f64_fsub_into_fneg_modifier_dynamic(double %v0, double %v1) #2 {
+; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_dynamic:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mul_f64 v[0:1], -v[0:1], v[2:3]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f64_fsub_into_fneg_modifier_dynamic:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
+; GISEL-NEXT:    v_mul_f64 v[0:1], -v[0:1], v[2:3]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub double -0.0, %v0
+  %mul = fmul double %sub, %v1
+  ret double %mul
+}
+
+define double @fold_f64_fsub_into_fneg_modifier_dynamic_nsz(double %v0, double %v1) #2 {
+; SDAG-LABEL: fold_f64_fsub_into_fneg_modifier_dynamic_nsz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mul_f64 v[0:1], -v[0:1], v[2:3]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f64_fsub_into_fneg_modifier_dynamic_nsz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f64 v[0:1], v[0:1], v[0:1]
+; GISEL-NEXT:    v_mul_f64 v[0:1], -v[0:1], v[2:3]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub nsz double -0.0, %v0
+  %mul = fmul nsz double %sub, %v1
+  ret double %mul
+}
+
+define float @fold_f32_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, float %v0, float %v1) #0 {
+; SDAG-LABEL: fold_f32_select_user_fsub_into_fneg_modifier_ieee:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, v2, -v1, vcc
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f32_select_user_fsub_into_fneg_modifier_ieee:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float -0.0, %v0
+  %mul = select i1 %cond, float %sub, float %v1
+  ret float %mul
+}
+
+define float @no_fold_f32_select_user_fsub_into_fneg_modifier_daz(i1 %cond, float %v0, float %v1) #1 {
+; SDAG-LABEL: no_fold_f32_select_user_fsub_into_fneg_modifier_daz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; SDAG-NEXT:    v_sub_f32_e32 v1, 0x80000000, v1
+; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_f32_select_user_fsub_into_fneg_modifier_daz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float -0.0, %v0
+  %mul = select i1 %cond, float %sub, float %v1
+  ret float %mul
+}
+
+define float @no_fold_f32_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond, float %v0, float %v1) #2 {
+; SDAG-LABEL: no_fold_f32_select_user_fsub_into_fneg_modifier_dynamic:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; SDAG-NEXT:    v_sub_f32_e32 v1, 0x80000000, v1
+; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_f32_select_user_fsub_into_fneg_modifier_dynamic:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_max_f32_e64 v1, -v1, -v1
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float -0.0, %v0
+  %mul = select i1 %cond, float %sub, float %v1
+  ret float %mul
+}
+
+define half @fold_f16_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, half %v0, half %v1) #0 {
+; SDAG-LABEL: fold_f16_select_user_fsub_into_fneg_modifier_ieee:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; SDAG-NEXT:    v_xor_b32_e32 v1, 0x8000, v1
+; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f16_select_user_fsub_into_fneg_modifier_ieee:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_max_f16_e64 v1, -v1, -v1
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub half -0.0, %v0
+  %mul = select i1 %cond, half %sub, half %v1
+  ret half %mul
+}
+
+define half @no_fold_f16_select_user_fsub_into_fneg_modifier_daz(i1 %cond, half %v0, half %v1) #1 {
+; SDAG-LABEL: no_fold_f16_select_user_fsub_into_fneg_modifier_daz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; SDAG-NEXT:    v_sub_f16_e32 v1, 0x8000, v1
+; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_f16_select_user_fsub_into_fneg_modifier_daz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_max_f16_e64 v1, -v1, -v1
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub half -0.0, %v0
+  %mul = select i1 %cond, half %sub, half %v1
+  ret half %mul
+}
+
+define half @no_fold_f16_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond, half %v0, half %v1) #2 {
+; SDAG-LABEL: no_fold_f16_select_user_fsub_into_fneg_modifier_dynamic:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; SDAG-NEXT:    v_sub_f16_e32 v1, 0x8000, v1
+; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_f16_select_user_fsub_into_fneg_modifier_dynamic:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_max_f16_e64 v1, -v1, -v1
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub half -0.0, %v0
+  %mul = select i1 %cond, half %sub, half %v1
+  ret half %mul
+}
+
+define double @fold_f64_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, double %v0, double %v1) #0 {
+; SDAG-LABEL: fold_f64_select_user_fsub_into_fneg_modifier_ieee:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; SDAG-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
+; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v1, v4, v2, vcc
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f64_select_user_fsub_into_fneg_modifier_ieee:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f64 v[1:2], -v[1:2], -v[1:2]
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v4, v2, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub double -0.0, %v0
+  %mul = select i1 %cond, double %sub, double %v1
+  ret double %mul
+}
+
+define double @no_fold_f64_select_user_fsub_into_fneg_modifier_daz(i1 %cond, double %v0, double %v1) #1 {
+; SDAG-LABEL: no_fold_f64_select_user_fsub_into_fneg_modifier_daz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; SDAG-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
+; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v1, v4, v2, vcc
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_f64_select_user_fsub_into_fneg_modifier_daz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f64 v[1:2], -v[1:2], -v[1:2]
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v4, v2, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub double -0.0, %v0
+  %mul = select i1 %cond, double %sub, double %v1
+  ret double %mul
+}
+
+define double @no_fold_f64_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond, double %v0, double %v1) #2 {
+; SDAG-LABEL: no_fold_f64_select_user_fsub_into_fneg_modifier_dynamic:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; SDAG-NEXT:    v_xor_b32_e32 v2, 0x80000000, v2
+; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; SDAG-NEXT:    v_cndmask_b32_e32 v1, v4, v2, vcc
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_f64_select_user_fsub_into_fneg_modifier_dynamic:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f64 v[1:2], -v[1:2], -v[1:2]
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v3, v1, vcc
+; GISEL-NEXT:    v_cndmask_b32_e32 v1, v4, v2, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub double -0.0, %v0
+  %mul = select i1 %cond, double %sub, double %v1
+  ret double %mul
+}
+
+define <2 x half> @fold_v2f16_select_user_fsub_into_fneg_modifier_ieee(i1 %cond, <2 x half> %v0, <2 x half> %v1) #0 {
+; SDAG-LABEL: fold_v2f16_select_user_fsub_into_fneg_modifier_ieee:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; SDAG-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
+; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_v2f16_select_user_fsub_into_fneg_modifier_ieee:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_pk_max_f16 v1, v1, v1 neg_lo:[1,1] neg_hi:[1,1]
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub <2 x half> <half -0.0, half -0.0>, %v0
+  %mul = select i1 %cond, <2 x half> %sub, <2 x half> %v1
+  ret <2 x half> %mul
+}
+
+define <2 x half> @no_fold_v2f16_select_user_fsub_into_fneg_modifier_daz(i1 %cond, <2 x half> %v0, <2 x half> %v1) #1 {
+; SDAG-LABEL: no_fold_v2f16_select_user_fsub_into_fneg_modifier_daz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; SDAG-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
+; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_v2f16_select_user_fsub_into_fneg_modifier_daz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_pk_max_f16 v1, v1, v1 neg_lo:[1,1] neg_hi:[1,1]
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub <2 x half> <half -0.0, half -0.0>, %v0
+  %mul = select i1 %cond, <2 x half> %sub, <2 x half> %v1
+  ret <2 x half> %mul
+}
+
+define <2 x half> @no_fold_v2f16_select_user_fsub_into_fneg_modifier_dynamic(i1 %cond, <2 x half> %v0, <2 x half> %v1) #2 {
+; SDAG-LABEL: no_fold_v2f16_select_user_fsub_into_fneg_modifier_dynamic:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_and_b32_e32 v0, 1, v0
+; SDAG-NEXT:    v_xor_b32_e32 v1, 0x80008000, v1
+; SDAG-NEXT:    v_cmp_eq_u32_e32 vcc, 1, v0
+; SDAG-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_v2f16_select_user_fsub_into_fneg_modifier_dynamic:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_and_b32_e32 v0, 1, v0
+; GISEL-NEXT:    v_pk_max_f16 v1, v1, v1 neg_lo:[1,1] neg_hi:[1,1]
+; GISEL-NEXT:    v_cmp_ne_u32_e32 vcc, 0, v0
+; GISEL-NEXT:    v_cndmask_b32_e32 v0, v2, v1, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub <2 x half> <half -0.0, half -0.0>, %v0
+  %mul = select i1 %cond, <2 x half> %sub, <2 x half> %v1
+  ret <2 x half> %mul
+}
+
+define float @fold_f32_strict_fsub_into_fneg_modifier_ieee(float %v0, float %v1) #0 {
+; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_ieee:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_sub_f32_e32 v0, 0x80000000, v0
+; CHECK-NEXT:    v_mul_f32_e32 v0, v0, v1
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %sub = call float @llvm.experimental.constrained.fsub.f32(float -0.0, float %v0, metadata !"round.dynamic", metadata !"fpexcept.strict")
+  %mul = call float @llvm.experimental.constrained.fmul.f32(float %sub, float %v1, metadata !"round.dynamic", metadata !"fpexcept.strict")
+  ret float %mul
+}
+
+define float @fold_f32_strict_fsub_into_fneg_modifier_daz(float %v0, float %v1) #1 {
+; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_daz:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_sub_f32_e32 v0, 0x80000000, v0
+; CHECK-NEXT:    v_mul_f32_e32 v0, v0, v1
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %sub = call float @llvm.experimental.constrained.fsub.f32(float -0.0, float %v0, metadata !"round.dynamic", metadata !"fpexcept.strict")
+  %mul = call float @llvm.experimental.constrained.fmul.f32(float %sub, float %v1, metadata !"round.dynamic", metadata !"fpexcept.strict")
+  ret float %mul
+}
+
+define float @fold_f32_strict_fsub_into_fneg_modifier_dynamic(float %v0, float %v1) #2 {
+; CHECK-LABEL: fold_f32_strict_fsub_into_fneg_modifier_dynamic:
+; CHECK:       ; %bb.0:
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_sub_f32_e32 v0, 0x80000000, v0
+; CHECK-NEXT:    v_mul_f32_e32 v0, v0, v1
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+  %sub = call float @llvm.experimental.constrained.fsub.f32(float -0.0, float %v0, metadata !"round.dynamic", metadata !"fpexcept.strict")
+  %mul = call float @llvm.experimental.constrained.fmul.f32(float %sub, float %v1, metadata !"round.dynamic", metadata !"fpexcept.strict")
+  ret float %mul
+}
+
+define i1 @no_fold_f32_fsub_into_fneg_modifier_class_issnan_ieee(float %v0) #0 {
+; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_issnan_ieee:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_cmp_class_f32_e64 s[4:5], -v0, 1
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_issnan_ieee:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_cmp_class_f32_e64 s[4:5], v0, 1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float -0.0, %v0
+  %class = call i1 @llvm.is.fpclass.f32(float %sub, i32 1)
+  ret i1 %class
+}
+
+define i1 @no_fold_f32_fsub_into_fneg_modifier_class_issnan_daz(float %v0) #1 {
+; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_issnan_daz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f32_e32 v0, 0x80000000, v0
+; SDAG-NEXT:    v_cmp_class_f32_e64 s[4:5], v0, 1
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_issnan_daz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_cmp_class_f32_e64 s[4:5], v0, 1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float -0.0, %v0
+  %class = call i1 @llvm.is.fpclass.f32(float %sub, i32 1)
+  ret i1 %class
+}
+
+define i1 @no_fold_f32_fsub_into_fneg_modifier_class_issnan_dynamic(float %v0) #2 {
+; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_issnan_dynamic:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f32_e32 v0, 0x80000000, v0
+; SDAG-NEXT:    v_cmp_class_f32_e64 s[4:5], v0, 1
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_issnan_dynamic:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_cmp_class_f32_e64 s[4:5], v0, 1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float -0.0, %v0
+  %class = call i1 @llvm.is.fpclass.f32(float %sub, i32 1)
+  ret i1 %class
+}
+
+define i1 @no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_ieee(float %v0) #0 {
+; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_ieee:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mov_b32_e32 v1, 0x90
+; SDAG-NEXT:    v_cmp_class_f32_e64 s[4:5], -v0, v1
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_ieee:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0x90
+; GISEL-NEXT:    v_cmp_class_f32_e32 vcc, v0, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float -0.0, %v0
+  %class = call i1 @llvm.is.fpclass.f32(float %sub, i32 144)
+  ret i1 %class
+}
+
+define i1 @no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_daz(float %v0) #1 {
+; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_daz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f32_e32 v0, 0x80000000, v0
+; SDAG-NEXT:    v_mov_b32_e32 v1, 0x90
+; SDAG-NEXT:    v_cmp_class_f32_e32 vcc, v0, v1
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_daz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0x90
+; GISEL-NEXT:    v_cmp_class_f32_e32 vcc, v0, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float -0.0, %v0
+  %class = call i1 @llvm.is.fpclass.f32(float %sub, i32 144)
+  ret i1 %class
+}
+
+define i1 @no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_dynamic(float %v0) #2 {
+; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_dynamic:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f32_e32 v0, 0x80000000, v0
+; SDAG-NEXT:    v_mov_b32_e32 v1, 0x90
+; SDAG-NEXT:    v_cmp_class_f32_e32 vcc, v0, v1
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_isdenormal_dynamic:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0x90
+; GISEL-NEXT:    v_cmp_class_f32_e32 vcc, v0, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float -0.0, %v0
+  %class = call i1 @llvm.is.fpclass.f32(float %sub, i32 144)
+  ret i1 %class
+}
+
+define i1 @no_fold_f32_fsub_into_fneg_modifier_class_var_ieee(float %v0, i32 %testmask) #0 {
+; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_var_ieee:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_cmp_class_f32_e64 s[4:5], -v0, v1
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_var_ieee:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_cmp_class_f32_e32 vcc, v0, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float -0.0, %v0
+  %class = call i1 @llvm.amdgcn.class.f32(float %sub, i32 %testmask)
+  ret i1 %class
+}
+
+define i1 @no_fold_f32_fsub_into_fneg_modifier_class_var_daz(float %v0, i32 %testmask) #1 {
+; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_var_daz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f32_e32 v0, 0x80000000, v0
+; SDAG-NEXT:    v_cmp_class_f32_e32 vcc, v0, v1
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_var_daz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_cmp_class_f32_e32 vcc, v0, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float -0.0, %v0
+  %class = call i1 @llvm.amdgcn.class.f32(float %sub, i32 %testmask)
+  ret i1 %class
+}
+
+define i1 @no_fold_f32_fsub_into_fneg_modifier_class_var_dynamic(float %v0, i32 %testmask) #2 {
+; SDAG-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_var_dynamic:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f32_e32 v0, 0x80000000, v0
+; SDAG-NEXT:    v_cmp_class_f32_e32 vcc, v0, v1
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_f32_fsub_into_fneg_modifier_class_var_dynamic:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_cmp_class_f32_e32 vcc, v0, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float -0.0, %v0
+  %class = call i1 @llvm.amdgcn.class.f32(float %sub, i32 %testmask)
+  ret i1 %class
+}
+
+define i1 @no_fold_f64_fsub_into_fneg_modifier_class_var_daz(double %v0, i32 %testmask) #1 {
+; SDAG-LABEL: no_fold_f64_fsub_into_fneg_modifier_class_var_daz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_cmp_class_f64_e64 s[4:5], -v[0:1], v2
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_f64_fsub_into_fneg_modifier_class_var_daz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub double -0.0, %v0
+  %class = call i1 @llvm.amdgcn.class.f64(double %sub, i32 %testmask)
+  ret i1 %class
+}
+
+define i1 @no_fold_f16_fsub_into_fneg_modifier_class_var_daz(half %v0, i32 %testmask) #1 {
+; SDAG-LABEL: no_fold_f16_fsub_into_fneg_modifier_class_var_daz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f16_e32 v0, 0x8000, v0
+; SDAG-NEXT:    v_cmp_class_f16_e32 vcc, v0, v1
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_f16_fsub_into_fneg_modifier_class_var_daz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f16_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_cmp_class_f16_e32 vcc, v0, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub half -0.0, %v0
+  %class = call i1 @llvm.amdgcn.class.f16(half %sub, i32 %testmask)
+  ret i1 %class
+}
+
+define i1 @no_fold_f64_fsub_into_fneg_modifier_class_daz(double %v0) #1 {
+; SDAG-LABEL: no_fold_f64_fsub_into_fneg_modifier_class_daz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_mov_b32_e32 v2, 0x90
+; SDAG-NEXT:    v_cmp_class_f64_e64 s[4:5], -v[0:1], v2
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, s[4:5]
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_f64_fsub_into_fneg_modifier_class_daz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f64 v[0:1], -v[0:1], -v[0:1]
+; GISEL-NEXT:    v_mov_b32_e32 v2, 0x90
+; GISEL-NEXT:    v_cmp_class_f64_e32 vcc, v[0:1], v2
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub double -0.0, %v0
+  %class = call i1 @llvm.is.fpclass.f64(double %sub, i32 144)
+  ret i1 %class
+}
+
+define i1 @no_fold_f16_fsub_into_fneg_modifier_class_daz(half %v0) #1 {
+; SDAG-LABEL: no_fold_f16_fsub_into_fneg_modifier_class_daz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f16_e32 v0, 0x8000, v0
+; SDAG-NEXT:    v_mov_b32_e32 v1, 0x90
+; SDAG-NEXT:    v_cmp_class_f16_e32 vcc, v0, v1
+; SDAG-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: no_fold_f16_fsub_into_fneg_modifier_class_daz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f16_e64 v0, -v0, -v0
+; GISEL-NEXT:    v_mov_b32_e32 v1, 0x90
+; GISEL-NEXT:    v_cmp_class_f16_e32 vcc, v0, v1
+; GISEL-NEXT:    v_cndmask_b32_e64 v0, 0, 1, vcc
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub half -0.0, %v0
+  %class = call i1 @llvm.is.fpclass.f16(half %sub, i32 144)
+  ret i1 %class
+}
+
+define amdgpu_gfx float @fold_f32_fsub_into_fneg_modifier_interp_daz(float %v0, i32 inreg %v1) #1 {
+; SDAG-LABEL: fold_f32_fsub_into_fneg_modifier_interp_daz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f32_e32 v0, 0x80000000, v0
+; SDAG-NEXT:    s_mov_b32 m0, s4
+; SDAG-NEXT:    s_nop 0
+; SDAG-NEXT:    v_interp_p1_f32_e32 v0, v0, attr0.x
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f32_fsub_into_fneg_modifier_interp_daz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    s_mov_b32 m0, s4
+; GISEL-NEXT:    s_nop 0
+; GISEL-NEXT:    v_interp_p1_f32_e32 v0, v0, attr0.x
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float -0.0, %v0
+  %p0_0 = call float @llvm.amdgcn.interp.p1(float %sub, i32 0, i32 0, i32 %v1)
+  ret float %p0_0
+}
+
+define amdgpu_gfx float @fold_f16_fsub_into_fneg_modifier_interp_daz(float %v0, i32 inreg %m0) #1 {
+; SDAG-LABEL: fold_f16_fsub_into_fneg_modifier_interp_daz:
+; SDAG:       ; %bb.0:
+; SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SDAG-NEXT:    v_sub_f32_e32 v0, 0x80000000, v0
+; SDAG-NEXT:    s_mov_b32 m0, s4
+; SDAG-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
+; SDAG-NEXT:    v_interp_p1ll_f16 v0, v0, attr2.y
+; SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GISEL-LABEL: fold_f16_fsub_into_fneg_modifier_interp_daz:
+; GISEL:       ; %bb.0:
+; GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GISEL-NEXT:    v_max_f32_e64 v0, -v0, -v0
+; GISEL-NEXT:    s_mov_b32 m0, s4
+; GISEL-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 2, 2), 3
+; GISEL-NEXT:    v_interp_p1ll_f16 v0, v0, attr2.y
+; GISEL-NEXT:    s_setpc_b64 s[30:31]
+  %sub = fsub float -0.0, %v0
+  %p1_0 = call float @llvm.amdgcn.interp.p1.f16(float %sub, i32 1, i32 2, i1 0, i32 %m0)
+  ret float %p1_0
+}
+
+declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
+declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
+declare i1 @llvm.is.fpclass.f32(float, i32 immarg)
+declare i1 @llvm.amdgcn.class.f32(float, i32)
+declare i1 @llvm.is.fpclass.f64(double, i32 immarg)
+declare i1 @llvm.amdgcn.class.f64(double, i32)
+declare i1 @llvm.is.fpclass.f16(half, i32 immarg)
+declare i1 @llvm.amdgcn.class.f16(half, i32)
+declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32)
+declare float @llvm.amdgcn.interp.p1.f16(float, i32, i32, i1, i32)
+
+attributes #0 = { "denormal-fp-math"="ieee,ieee" }
+attributes #1 = { "denormal-fp-math"="preserve-sign,preserve-sign" }
+attributes #2 = { "denormal-fp-math"="dynamic,dynamic" }


        


More information about the llvm-commits mailing list