[llvm] 11c3cea - AMDGPU: Override getNegatedExpression constant handling

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 15 01:21:09 PST 2023


Author: Matt Arsenault
Date: 2023-02-15T05:21:00-04:00
New Revision: 11c3cead23783e65fb30e673d62771352078ff05

URL: https://github.com/llvm/llvm-project/commit/11c3cead23783e65fb30e673d62771352078ff05
DIFF: https://github.com/llvm/llvm-project/commit/11c3cead23783e65fb30e673d62771352078ff05.diff

LOG: AMDGPU: Override getNegatedExpression constant handling

Ignore the multiple use heuristics of the default
implementation, and report cost based on inline immediates. This
is mostly interesting for -0 vs. 0. Gets a few small improvements.
fneg_fadd_0_f16 is a small regression. We could probably avoid this
if we handled folding fneg into div_fixup.

Added: 
    

Modified: 
    llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
    llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
    llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
    llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
    llvm/test/CodeGen/AMDGPU/v_pack.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 58a53f1f939d..1b1abc4f39b7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -793,6 +793,13 @@ SDValue AMDGPUTargetLowering::getNegatedExpression(
     NegatibleCost &Cost, unsigned Depth) const {
 
   switch (Op.getOpcode()) {
+  case ISD::ConstantFP: {
+    auto *C = cast<ConstantFPSDNode>(Op);
+    Cost = getConstantNegateCost(C);
+    APFloat V = C->getValueAPF();
+    V.changeSign();
+    return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
+  }
   case ISD::FMA:
   case ISD::FMAD: {
     // Negating a fma is not free if it has users without source mods.

diff  --git a/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll b/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
index f6ac0f6dd2a5..b2a27b913592 100644
--- a/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
+++ b/llvm/test/CodeGen/AMDGPU/fdiv32-to-rcp-folding.ll
@@ -276,10 +276,10 @@ define amdgpu_kernel void @div_v4_c_by_x_25ulp(ptr addrspace(1) %arg) {
 }
 
 ; GCN-LABEL: {{^}}div_v4_c_by_minus_x_25ulp:
-; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
-; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
-; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
-; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}}
+; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, s{{[0-9]+}}, s{{[0-9]+}}, -2.0{{$}}
+; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, s{{[0-9]+}}, s{{[0-9]+}}, 2.0{{$}}
+; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0, s{{[0-9]+}}, -2.0{{$}}
+; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0, s{{[0-9]+}}, 2.0{{$}}
 ; GCN-DENORM-DAG: v_rcp_f32_e32
 ; GCN-DENORM-DAG: v_rcp_f32_e32
 
@@ -301,7 +301,7 @@ define amdgpu_kernel void @div_v4_c_by_x_25ulp(ptr addrspace(1) %arg) {
 ; GCN-DENORM-DAG: v_div_fmas_f32
 ; GCN-DENORM-DAG: v_div_fmas_f32
 ; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, -2.0{{$}}
-; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, -2.0{{$}}
+; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, 2.0{{$}}
 
 ; GCN-FLUSH-DAG:  v_rcp_f32_e32
 ; GCN-FLUSH-DAG:  v_rcp_f32_e64

diff  --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
index a4cc9522a759..f0d9d03667c5 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.f16.ll
@@ -519,9 +519,9 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x
 ; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, s0
 ; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v0, v0
 ; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
-; SI-NSZ-NEXT:    v_div_scale_f32 v2, s[0:1], v0, v0, 1.0
+; SI-NSZ-NEXT:    v_div_scale_f32 v2, s[0:1], v0, v0, -1.0
 ; SI-NSZ-NEXT:    v_rcp_f32_e32 v3, v2
-; SI-NSZ-NEXT:    v_div_scale_f32 v4, vcc, 1.0, v0, 1.0
+; SI-NSZ-NEXT:    v_div_scale_f32 v4, vcc, -1.0, v0, -1.0
 ; SI-NSZ-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; SI-NSZ-NEXT:    v_fma_f32 v5, -v2, v3, 1.0
 ; SI-NSZ-NEXT:    v_fma_f32 v3, v5, v3, v3
@@ -531,8 +531,8 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x
 ; SI-NSZ-NEXT:    v_fma_f32 v2, -v2, v5, v4
 ; SI-NSZ-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
 ; SI-NSZ-NEXT:    v_div_fmas_f32 v2, v2, v3, v5
-; SI-NSZ-NEXT:    v_div_fixup_f32 v0, v2, v0, 1.0
-; SI-NSZ-NEXT:    v_mul_f32_e32 v0, 0x80000000, v0
+; SI-NSZ-NEXT:    v_div_fixup_f32 v0, v2, v0, -1.0
+; SI-NSZ-NEXT:    v_mul_f32_e32 v0, 0, v0
 ; SI-NSZ-NEXT:    v_cmp_nlt_f32_e64 vcc, -v0, v1
 ; SI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; SI-NSZ-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
@@ -563,9 +563,10 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x
 ; VI-NSZ-NEXT:    v_mov_b32_e32 v2, s0
 ; VI-NSZ-NEXT:    v_mov_b32_e32 v1, 0x7e00
 ; VI-NSZ-NEXT:    v_rcp_f32_e32 v0, v0
+; VI-NSZ-NEXT:    v_sub_f32_e32 v0, 0x80000000, v0
 ; VI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; VI-NSZ-NEXT:    v_div_fixup_f16 v0, v0, s1, 1.0
-; VI-NSZ-NEXT:    v_mul_f16_e32 v0, 0x8000, v0
+; VI-NSZ-NEXT:    v_div_fixup_f16 v0, v0, s1, -1.0
+; VI-NSZ-NEXT:    v_mul_f16_e32 v0, 0, v0
 ; VI-NSZ-NEXT:    v_cmp_nlt_f16_e64 vcc, -v0, s0
 ; VI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; VI-NSZ-NEXT:    v_cmp_nlt_f16_e32 vcc, 0, v0
@@ -598,13 +599,15 @@ define amdgpu_ps half @fneg_fadd_0_f16(half inreg %tmp2, half inreg %tmp6, <4 x
 ; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(SKIP_2) | instid1(VALU_DEP_1)
 ; GFX11-NSZ-NEXT:    v_rcp_f32_e32 v0, v0
 ; GFX11-NSZ-NEXT:    s_waitcnt_depctr 0xfff
+; GFX11-NSZ-NEXT:    v_sub_f32_e32 v0, 0x80000000, v0
 ; GFX11-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, v0
-; GFX11-NSZ-NEXT:    v_div_fixup_f16 v0, v0, s1, 1.0
 ; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
-; GFX11-NSZ-NEXT:    v_mul_f16_e32 v0, 0x8000, v0
-; GFX11-NSZ-NEXT:    v_cmp_nlt_f16_e64 s1, -v0, s0
+; GFX11-NSZ-NEXT:    v_div_fixup_f16 v0, v0, s1, -1.0
+; GFX11-NSZ-NEXT:    v_mul_f16_e32 v0, 0, v0
 ; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
+; GFX11-NSZ-NEXT:    v_cmp_nlt_f16_e64 s1, -v0, s0
 ; GFX11-NSZ-NEXT:    v_cndmask_b32_e64 v0, v0, s0, s1
+; GFX11-NSZ-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX11-NSZ-NEXT:    v_cmp_nlt_f16_e32 vcc_lo, 0, v0
 ; GFX11-NSZ-NEXT:    v_cndmask_b32_e64 v0, 0x7e00, 0, vcc_lo
 ; GFX11-NSZ-NEXT:    ; return to shader part epilog
@@ -641,10 +644,10 @@ define amdgpu_ps half @fneg_fadd_0_nsz_f16(half inreg %tmp2, half inreg %tmp6, <
 ; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v0, s1
 ; SI-NSZ-NEXT:    v_cvt_f16_f32_e32 v1, s0
 ; SI-NSZ-NEXT:    v_mov_b32_e32 v2, 0x7fc00000
-; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v0, v0
+; SI-NSZ-NEXT:    v_cvt_f32_f16_e64 v0, -v0
 ; SI-NSZ-NEXT:    v_cvt_f32_f16_e32 v1, v1
 ; SI-NSZ-NEXT:    v_rcp_f32_e32 v0, v0
-; SI-NSZ-NEXT:    v_mul_f32_e32 v0, 0x80000000, v0
+; SI-NSZ-NEXT:    v_mul_f32_e32 v0, 0, v0
 ; SI-NSZ-NEXT:    v_cmp_nlt_f32_e64 vcc, -v0, v1
 ; SI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
 ; SI-NSZ-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0

diff  --git a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
index 30177a5834dd..53f97ad9c34c 100644
--- a/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
+++ b/llvm/test/CodeGen/AMDGPU/fneg-combines.new.ll
@@ -203,9 +203,9 @@ define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4
 ;
 ; SI-NSZ-LABEL: fneg_fadd_0_f32:
 ; SI-NSZ:       ; %bb.0: ; %.entry
-; SI-NSZ-NEXT:    v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
+; SI-NSZ-NEXT:    v_div_scale_f32 v0, s[2:3], s1, s1, -1.0
 ; SI-NSZ-NEXT:    v_rcp_f32_e32 v1, v0
-; SI-NSZ-NEXT:    v_div_scale_f32 v2, vcc, 1.0, s1, 1.0
+; SI-NSZ-NEXT:    v_div_scale_f32 v2, vcc, -1.0, s1, -1.0
 ; SI-NSZ-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; SI-NSZ-NEXT:    v_fma_f32 v3, -v0, v1, 1.0
 ; SI-NSZ-NEXT:    v_fma_f32 v1, v3, v1, v1
@@ -215,8 +215,8 @@ define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4
 ; SI-NSZ-NEXT:    v_fma_f32 v0, -v0, v3, v2
 ; SI-NSZ-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 0
 ; SI-NSZ-NEXT:    v_div_fmas_f32 v0, v0, v1, v3
-; SI-NSZ-NEXT:    v_div_fixup_f32 v0, v0, s1, 1.0
-; SI-NSZ-NEXT:    v_mul_f32_e32 v0, 0x80000000, v0
+; SI-NSZ-NEXT:    v_div_fixup_f32 v0, v0, s1, -1.0
+; SI-NSZ-NEXT:    v_mul_f32_e32 v0, 0, v0
 ; SI-NSZ-NEXT:    v_mov_b32_e32 v1, s0
 ; SI-NSZ-NEXT:    v_cmp_nlt_f32_e64 vcc, -v0, s0
 ; SI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v1, vcc
@@ -251,8 +251,8 @@ define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4
 ;
 ; VI-NSZ-LABEL: fneg_fadd_0_f32:
 ; VI-NSZ:       ; %bb.0: ; %.entry
-; VI-NSZ-NEXT:    v_div_scale_f32 v0, s[2:3], s1, s1, 1.0
-; VI-NSZ-NEXT:    v_div_scale_f32 v1, vcc, 1.0, s1, 1.0
+; VI-NSZ-NEXT:    v_div_scale_f32 v0, s[2:3], s1, s1, -1.0
+; VI-NSZ-NEXT:    v_div_scale_f32 v1, vcc, -1.0, s1, -1.0
 ; VI-NSZ-NEXT:    v_rcp_f32_e32 v2, v0
 ; VI-NSZ-NEXT:    s_setreg_imm32_b32 hwreg(HW_REG_MODE, 4, 2), 3
 ; VI-NSZ-NEXT:    v_fma_f32 v3, -v0, v2, 1.0
@@ -265,8 +265,8 @@ define amdgpu_ps float @fneg_fadd_0_f32(float inreg %tmp2, float inreg %tmp6, <4
 ; VI-NSZ-NEXT:    v_div_fmas_f32 v0, v0, v2, v3
 ; VI-NSZ-NEXT:    v_mov_b32_e32 v2, s0
 ; VI-NSZ-NEXT:    v_mov_b32_e32 v1, 0x7fc00000
-; VI-NSZ-NEXT:    v_div_fixup_f32 v0, v0, s1, 1.0
-; VI-NSZ-NEXT:    v_mul_f32_e32 v0, 0x80000000, v0
+; VI-NSZ-NEXT:    v_div_fixup_f32 v0, v0, s1, -1.0
+; VI-NSZ-NEXT:    v_mul_f32_e32 v0, 0, v0
 ; VI-NSZ-NEXT:    v_cmp_nlt_f32_e64 vcc, -v0, s0
 ; VI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v2, vcc
 ; VI-NSZ-NEXT:    v_cmp_nlt_f32_e32 vcc, 0, v0
@@ -575,32 +575,30 @@ define amdgpu_ps double @fneg_fadd_0_f64(double inreg %tmp2, double inreg %tmp6,
 ; SI-SAFE-NEXT:    s_mov_b32 s0, 0
 ; SI-SAFE-NEXT:    ; return to shader part epilog
 ;
-; SI-NSZ-LABEL: fneg_fadd_0_f64:
-; SI-NSZ:       ; %bb.0: ; %.entry
-; SI-NSZ-NEXT:    v_div_scale_f64 v[0:1], s[4:5], s[2:3], s[2:3], 1.0
-; SI-NSZ-NEXT:    s_mov_b32 s4, 0
-; SI-NSZ-NEXT:    s_brev_b32 s5, 1
-; SI-NSZ-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; SI-NSZ-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; SI-NSZ-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
-; SI-NSZ-NEXT:    v_div_scale_f64 v[4:5], vcc, 1.0, s[2:3], 1.0
-; SI-NSZ-NEXT:    v_fma_f64 v[6:7], -v[0:1], v[2:3], 1.0
-; SI-NSZ-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; SI-NSZ-NEXT:    v_mul_f64 v[6:7], v[4:5], v[2:3]
-; SI-NSZ-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[6:7], v[4:5]
-; SI-NSZ-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[6:7]
-; SI-NSZ-NEXT:    v_mov_b32_e32 v2, s1
-; SI-NSZ-NEXT:    v_mov_b32_e32 v3, s0
-; SI-NSZ-NEXT:    v_div_fixup_f64 v[0:1], v[0:1], s[2:3], 1.0
-; SI-NSZ-NEXT:    v_mul_f64 v[0:1], v[0:1], s[4:5]
-; SI-NSZ-NEXT:    v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1]
-; SI-NSZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
-; SI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; SI-NSZ-NEXT:    v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
-; SI-NSZ-NEXT:    s_and_b64 s[0:1], vcc, exec
-; SI-NSZ-NEXT:    s_cselect_b32 s1, 0, 0x7ff80000
-; SI-NSZ-NEXT:    s_mov_b32 s0, 0
-; SI-NSZ-NEXT:    ; return to shader part epilog
+; GCN-NSZ-LABEL: fneg_fadd_0_f64:
+; GCN-NSZ:       ; %bb.0: ; %.entry
+; GCN-NSZ-NEXT:    v_div_scale_f64 v[0:1], s[4:5], s[2:3], s[2:3], -1.0
+; GCN-NSZ-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
+; GCN-NSZ-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
+; GCN-NSZ-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
+; GCN-NSZ-NEXT:    v_div_scale_f64 v[4:5], vcc, -1.0, s[2:3], -1.0
+; GCN-NSZ-NEXT:    v_fma_f64 v[6:7], -v[0:1], v[2:3], 1.0
+; GCN-NSZ-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
+; GCN-NSZ-NEXT:    v_mul_f64 v[6:7], v[4:5], v[2:3]
+; GCN-NSZ-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[6:7], v[4:5]
+; GCN-NSZ-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[6:7]
+; GCN-NSZ-NEXT:    v_mov_b32_e32 v2, s1
+; GCN-NSZ-NEXT:    v_mov_b32_e32 v3, s0
+; GCN-NSZ-NEXT:    v_div_fixup_f64 v[0:1], v[0:1], s[2:3], -1.0
+; GCN-NSZ-NEXT:    v_mul_f64 v[0:1], v[0:1], 0
+; GCN-NSZ-NEXT:    v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1]
+; GCN-NSZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
+; GCN-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
+; GCN-NSZ-NEXT:    v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
+; GCN-NSZ-NEXT:    s_and_b64 s[0:1], vcc, exec
+; GCN-NSZ-NEXT:    s_cselect_b32 s1, 0, 0x7ff80000
+; GCN-NSZ-NEXT:    s_mov_b32 s0, 0
+; GCN-NSZ-NEXT:    ; return to shader part epilog
 ;
 ; VI-SAFE-LABEL: fneg_fadd_0_f64:
 ; VI-SAFE:       ; %bb.0: ; %.entry
@@ -628,33 +626,6 @@ define amdgpu_ps double @fneg_fadd_0_f64(double inreg %tmp2, double inreg %tmp6,
 ; VI-SAFE-NEXT:    s_cselect_b32 s1, 0, 0x7ff80000
 ; VI-SAFE-NEXT:    s_mov_b32 s0, 0
 ; VI-SAFE-NEXT:    ; return to shader part epilog
-;
-; VI-NSZ-LABEL: fneg_fadd_0_f64:
-; VI-NSZ:       ; %bb.0: ; %.entry
-; VI-NSZ-NEXT:    v_div_scale_f64 v[0:1], s[4:5], s[2:3], s[2:3], 1.0
-; VI-NSZ-NEXT:    v_rcp_f64_e32 v[2:3], v[0:1]
-; VI-NSZ-NEXT:    v_fma_f64 v[4:5], -v[0:1], v[2:3], 1.0
-; VI-NSZ-NEXT:    v_fma_f64 v[2:3], v[2:3], v[4:5], v[2:3]
-; VI-NSZ-NEXT:    v_div_scale_f64 v[4:5], vcc, 1.0, s[2:3], 1.0
-; VI-NSZ-NEXT:    v_fma_f64 v[6:7], -v[0:1], v[2:3], 1.0
-; VI-NSZ-NEXT:    v_fma_f64 v[2:3], v[2:3], v[6:7], v[2:3]
-; VI-NSZ-NEXT:    v_mul_f64 v[6:7], v[4:5], v[2:3]
-; VI-NSZ-NEXT:    v_fma_f64 v[0:1], -v[0:1], v[6:7], v[4:5]
-; VI-NSZ-NEXT:    v_div_fmas_f64 v[0:1], v[0:1], v[2:3], v[6:7]
-; VI-NSZ-NEXT:    v_mov_b32_e32 v2, s1
-; VI-NSZ-NEXT:    v_mov_b32_e32 v3, s0
-; VI-NSZ-NEXT:    v_div_fixup_f64 v[0:1], v[0:1], s[2:3], 1.0
-; VI-NSZ-NEXT:    s_mov_b32 s2, 0
-; VI-NSZ-NEXT:    s_brev_b32 s3, 1
-; VI-NSZ-NEXT:    v_mul_f64 v[0:1], v[0:1], s[2:3]
-; VI-NSZ-NEXT:    v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1]
-; VI-NSZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
-; VI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
-; VI-NSZ-NEXT:    v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
-; VI-NSZ-NEXT:    s_and_b64 s[0:1], vcc, exec
-; VI-NSZ-NEXT:    s_cselect_b32 s1, 0, 0x7ff80000
-; VI-NSZ-NEXT:    s_mov_b32 s0, 0
-; VI-NSZ-NEXT:    ; return to shader part epilog
 .entry:
   %tmp7 = fdiv double 1.000000e+00, %tmp6
   %tmp8 = fmul double 0.000000e+00, %tmp7
@@ -691,14 +662,13 @@ define amdgpu_ps double @fneg_fadd_0_nsz_f64(double inreg %tmp2, double inreg %t
 ; SI-NSZ-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
 ; SI-NSZ-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
 ; SI-NSZ-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; SI-NSZ-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
-; SI-NSZ-NEXT:    s_mov_b32 s2, 0
-; SI-NSZ-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; SI-NSZ-NEXT:    s_brev_b32 s3, 1
-; SI-NSZ-NEXT:    v_mul_f64 v[0:1], v[0:1], s[2:3]
+; SI-NSZ-NEXT:    v_mul_f64 v[2:3], v[0:1], -1.0
+; SI-NSZ-NEXT:    v_fma_f64 v[4:5], -s[2:3], v[2:3], -1.0
+; SI-NSZ-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; SI-NSZ-NEXT:    v_mov_b32_e32 v2, s1
-; SI-NSZ-NEXT:    v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1]
+; SI-NSZ-NEXT:    v_mul_f64 v[0:1], v[0:1], 0
 ; SI-NSZ-NEXT:    v_mov_b32_e32 v3, s0
+; SI-NSZ-NEXT:    v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1]
 ; SI-NSZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; SI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc
 ; SI-NSZ-NEXT:    v_cmp_nlt_f64_e32 vcc, 0, v[0:1]
@@ -714,13 +684,12 @@ define amdgpu_ps double @fneg_fadd_0_nsz_f64(double inreg %tmp2, double inreg %t
 ; VI-NSZ-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
 ; VI-NSZ-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
 ; VI-NSZ-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
-; VI-NSZ-NEXT:    v_fma_f64 v[2:3], -s[2:3], v[0:1], 1.0
-; VI-NSZ-NEXT:    s_mov_b32 s2, 0
-; VI-NSZ-NEXT:    s_brev_b32 s3, 1
-; VI-NSZ-NEXT:    v_fma_f64 v[0:1], v[2:3], v[0:1], v[0:1]
+; VI-NSZ-NEXT:    v_mul_f64 v[2:3], v[0:1], -1.0
+; VI-NSZ-NEXT:    v_fma_f64 v[4:5], -s[2:3], v[2:3], -1.0
+; VI-NSZ-NEXT:    v_fma_f64 v[0:1], v[4:5], v[0:1], v[2:3]
 ; VI-NSZ-NEXT:    v_mov_b32_e32 v2, s1
 ; VI-NSZ-NEXT:    v_mov_b32_e32 v3, s0
-; VI-NSZ-NEXT:    v_mul_f64 v[0:1], v[0:1], s[2:3]
+; VI-NSZ-NEXT:    v_mul_f64 v[0:1], v[0:1], 0
 ; VI-NSZ-NEXT:    v_cmp_nlt_f64_e64 vcc, -v[0:1], s[0:1]
 ; VI-NSZ-NEXT:    v_cndmask_b32_e32 v1, v1, v2, vcc
 ; VI-NSZ-NEXT:    v_cndmask_b32_e32 v0, v0, v3, vcc

diff  --git a/llvm/test/CodeGen/AMDGPU/v_pack.ll b/llvm/test/CodeGen/AMDGPU/v_pack.ll
index 5ec9284c870c..a94542ac1e91 100644
--- a/llvm/test/CodeGen/AMDGPU/v_pack.ll
+++ b/llvm/test/CodeGen/AMDGPU/v_pack.ll
@@ -63,7 +63,7 @@ define amdgpu_kernel void @v_pack_b32_v2f16_sub(ptr addrspace(1) %in0, ptr addrs
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
 ; GCN-NEXT:    global_load_ushort v2, v0, s[2:3] glc dlc
 ; GCN-NEXT:    s_waitcnt vmcnt(0)
-; GCN-NEXT:    v_subrev_f16_e32 v0, 2.0, v1
+; GCN-NEXT:    v_add_f16_e32 v0, -2.0, v1
 ; GCN-NEXT:    v_add_f16_e32 v1, 2.0, v2
 ; GCN-NEXT:    v_pack_b32_f16 v0, v0, v1
 ; GCN-NEXT:    ;;#ASMSTART


        


More information about the llvm-commits mailing list