[llvm] 4c4ff50 - AMDGPU: Add more baseline test for fmul to ldexp patterns
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Wed Aug 23 17:39:13 PDT 2023
Author: Matt Arsenault
Date: 2023-08-23T20:31:54-04:00
New Revision: 4c4ff503617e59cf8ab7146cbe16cd7f146d49b4
URL: https://github.com/llvm/llvm-project/commit/4c4ff503617e59cf8ab7146cbe16cd7f146d49b4
DIFF: https://github.com/llvm/llvm-project/commit/4c4ff503617e59cf8ab7146cbe16cd7f146d49b4.diff
LOG: AMDGPU: Add more baseline test for fmul to ldexp patterns
Added:
Modified:
llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir
llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir
index db2d6897ee6f09..d592d78a35d575 100644
--- a/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir
+++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fmul.mir
@@ -302,3 +302,87 @@ body: |
SI_RETURN implicit $vgpr0_vgpr1
...
+
+---
+name: fmul_to_ldexp_f64_3
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GCN-LABEL: name: fmul_to_ldexp_f64_3
+ ; GCN: liveins: $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1070596096, implicit $exec
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+ ; GCN-NEXT: [[V_MUL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 2, [[COPY]], 0, [[REG_SEQUENCE]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_MUL_F64_e64_]]
+ ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1
+ %0:vgpr(s64) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_FABS %0
+ %2:vgpr(s64) = G_FCONSTANT double -16.0
+ %3:vgpr(s64) = G_FMUL %1, %2
+ $vgpr0_vgpr1 = COPY %3
+ SI_RETURN implicit $vgpr0_vgpr1
+
+...
+
+---
+name: fmul_to_ldexp_f64_4
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GCN-LABEL: name: fmul_to_ldexp_f64_4
+ ; GCN: liveins: $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1070596096, implicit $exec
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+ ; GCN-NEXT: [[V_MUL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 3, [[COPY]], 0, [[REG_SEQUENCE]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_MUL_F64_e64_]]
+ ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1
+ %0:vgpr(s64) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_FABS %0
+ %2:vgpr(s64) = G_FNEG %1
+ %3:vgpr(s64) = G_FCONSTANT double -16.0
+ %4:vgpr(s64) = G_FMUL %2, %3
+ $vgpr0_vgpr1 = COPY %4
+ SI_RETURN implicit $vgpr0_vgpr1
+
+...
+
+---
+name: fmul_to_ldexp_f64_5
+legalized: true
+regBankSelected: true
+body: |
+ bb.0:
+ liveins: $vgpr0_vgpr1
+
+ ; GCN-LABEL: name: fmul_to_ldexp_f64_5
+ ; GCN: liveins: $vgpr0_vgpr1
+ ; GCN-NEXT: {{ $}}
+ ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
+ ; GCN-NEXT: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
+ ; GCN-NEXT: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 -1070596096, implicit $exec
+ ; GCN-NEXT: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_MOV_B32_e32_]], %subreg.sub0, [[V_MOV_B32_e32_1]], %subreg.sub1
+ ; GCN-NEXT: [[V_MUL_F64_e64_:%[0-9]+]]:vreg_64 = nofpexcept V_MUL_F64_e64 3, [[COPY]], 1, [[REG_SEQUENCE]], 0, 0, implicit $mode, implicit $exec
+ ; GCN-NEXT: $vgpr0_vgpr1 = COPY [[V_MUL_F64_e64_]]
+ ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1
+ %0:vgpr(s64) = COPY $vgpr0_vgpr1
+ %1:vgpr(s64) = G_FABS %0
+ %2:vgpr(s64) = G_FNEG %1
+ %3:vgpr(s64) = G_FCONSTANT double -16.0
+ %4:vgpr(s64) = G_FNEG %3
+ %5:vgpr(s64) = G_FMUL %2, %4
+ $vgpr0_vgpr1 = COPY %5
+ SI_RETURN implicit $vgpr0_vgpr1
+
+...
diff --git a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
index f4477770b40df4..a5057529f0be50 100644
--- a/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
+++ b/llvm/test/CodeGen/AMDGPU/fmul-to-ldexp.ll
@@ -6830,6 +6830,383 @@ define half @v_constrained_fmul_32_f16(half %x, half %y) #0 {
ret half %val
}
+define double @v_mul_fabs_0x1pn1031_f64(double %x) {
+; GFX9-SDAG-LABEL: v_mul_fabs_0x1pn1031_f64:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: s_movk_i32 s4, 0xfbf9
+; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, s4
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_mul_fabs_0x1pn1031_f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX9-GISEL-NEXT: s_movk_i32 s5, 0x800
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_mul_fabs_0x1pn1031_f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 0xfffffbf9
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_fabs_0x1pn1031_f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX10-GISEL-NEXT: s_movk_i32 s5, 0x800
+; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_fabs_0x1pn1031_f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 0xfffffbf9
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_fabs_0x1pn1031_f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 0
+; GFX11-GISEL-NEXT: s_movk_i32 s1, 0x800
+; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %fabs.x = call double @llvm.fabs.f64(double %x)
+ %mul = fmul double %fabs.x, 4.34584737989687770135e-311
+ ret double %mul
+}
+
+define double @v_mul_fabs_neg256_f64(double %x) {
+; GFX9-LABEL: v_mul_fabs_neg256_f64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s4, 0
+; GFX9-NEXT: s_mov_b32 s5, 0xc0700000
+; GFX9-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_mul_fabs_neg256_f64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_mov_b32 s4, 0
+; GFX10-NEXT: s_mov_b32 s5, 0xc0700000
+; GFX10-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_mul_fabs_neg256_f64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_mov_b32 s0, 0
+; GFX11-NEXT: s_mov_b32 s1, 0xc0700000
+; GFX11-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %fabs.x = call double @llvm.fabs.f64(double %x)
+ %mul = fmul double %fabs.x, -256.0
+ ret double %mul
+}
+
+define double @v_mul_fabs_neg8_f64(double %x) {
+; GFX9-LABEL: v_mul_fabs_neg8_f64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s4, 0
+; GFX9-NEXT: s_mov_b32 s5, 0xc0200000
+; GFX9-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_mul_fabs_neg8_f64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_mov_b32 s4, 0
+; GFX10-NEXT: s_mov_b32 s5, 0xc0200000
+; GFX10-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_mul_fabs_neg8_f64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_mov_b32 s0, 0
+; GFX11-NEXT: s_mov_b32 s1, 0xc0200000
+; GFX11-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %fabs.x = call double @llvm.fabs.f64(double %x)
+ %mul = fmul double %fabs.x, -8.0
+ ret double %mul
+}
+
+define double @v_mul_fabs_neg4_f64(double %x) {
+; GCN-LABEL: v_mul_fabs_neg4_f64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mul_f64 v[0:1], |v[0:1]|, -4.0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %fabs.x = call double @llvm.fabs.f64(double %x)
+ %mul = fmul double %fabs.x, -4.0
+ ret double %mul
+}
+
+define double @v_mul_fabs_neg2_f64(double %x) {
+; GCN-LABEL: v_mul_fabs_neg2_f64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mul_f64 v[0:1], |v[0:1]|, -2.0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %fabs.x = call double @llvm.fabs.f64(double %x)
+ %mul = fmul double %fabs.x, -2.0
+ ret double %mul
+}
+
+define double @v_mul_fabs_neg1_f64(double %x) {
+; GFX9-SDAG-LABEL: v_mul_fabs_neg1_f64:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_or_b32_e32 v1, 0x80000000, v1
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_mul_fabs_neg1_f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, -1.0
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_mul_fabs_neg1_f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_or_b32_e32 v1, 0x80000000, v1
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_fabs_neg1_f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, -1.0
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_fabs_neg1_f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_or_b32_e32 v1, 0x80000000, v1
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_fabs_neg1_f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, -1.0
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %fabs.x = call double @llvm.fabs.f64(double %x)
+ %mul = fmul double %fabs.x, -1.0
+ ret double %mul
+}
+
+define double @v_mul_fabs_neghalf_f64(double %x) {
+; GCN-LABEL: v_mul_fabs_neghalf_f64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mul_f64 v[0:1], |v[0:1]|, -0.5
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %fabs.x = call double @llvm.fabs.f64(double %x)
+ %mul = fmul double %fabs.x, -0.5
+ ret double %mul
+}
+
+define double @v_mul_fabs_negquarter_f64(double %x) {
+; GFX9-LABEL: v_mul_fabs_negquarter_f64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: s_mov_b32 s4, 0
+; GFX9-NEXT: s_mov_b32 s5, 0xbfd00000
+; GFX9-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: v_mul_fabs_negquarter_f64:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: s_mov_b32 s4, 0
+; GFX10-NEXT: s_mov_b32 s5, 0xbfd00000
+; GFX10-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: v_mul_fabs_negquarter_f64:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: s_mov_b32 s0, 0
+; GFX11-NEXT: s_mov_b32 s1, 0xbfd00000
+; GFX11-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1]
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+ %fabs.x = call double @llvm.fabs.f64(double %x)
+ %mul = fmul double %fabs.x, -0.25
+ ret double %mul
+}
+
+define double @v_mul_fabs_quarter_f64(double %x) {
+; GFX9-SDAG-LABEL: v_mul_fabs_quarter_f64:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, -2
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_mul_fabs_quarter_f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x3fd00000
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_mul_fabs_quarter_f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, -2
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_fabs_quarter_f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x3fd00000
+; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_fabs_quarter_f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, -2
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_fabs_quarter_f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 0
+; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x3fd00000
+; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %fabs.x = call double @llvm.fabs.f64(double %x)
+ %mul = fmul double %fabs.x, 0.25
+ ret double %mul
+}
+
+define double @v_mul_fabs_half_f64(double %x) {
+; GCN-LABEL: v_mul_fabs_half_f64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mul_f64 v[0:1], |v[0:1]|, 0.5
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %fabs.x = call double @llvm.fabs.f64(double %x)
+ %mul = fmul double %fabs.x, 0.5
+ ret double %mul
+}
+
+define double @v_mul_fabs_1_f64(double %x) {
+; GCN-LABEL: v_mul_fabs_1_f64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %fabs.x = call double @llvm.fabs.f64(double %x)
+ %mul = fmul double %fabs.x, 1.0
+ ret double %mul
+}
+
+define double @v_mul_fabs_2_f64(double %x) {
+; GFX9-SDAG-LABEL: v_mul_fabs_2_f64:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_add_f64 v[0:1], |v[0:1]|, |v[0:1]|
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_mul_fabs_2_f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, 2.0
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_mul_fabs_2_f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_add_f64 v[0:1], |v[0:1]|, |v[0:1]|
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_fabs_2_f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, 2.0
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_fabs_2_f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_add_f64 v[0:1], |v[0:1]|, |v[0:1]|
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_fabs_2_f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, 2.0
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %fabs.x = call double @llvm.fabs.f64(double %x)
+ %mul = fmul double %fabs.x, 2.0
+ ret double %mul
+}
+
+define double @v_mul_fabs_4_f64(double %x) {
+; GCN-LABEL: v_mul_fabs_4_f64:
+; GCN: ; %bb.0:
+; GCN-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GCN-NEXT: v_mul_f64 v[0:1], |v[0:1]|, 4.0
+; GCN-NEXT: s_setpc_b64 s[30:31]
+ %fabs.x = call double @llvm.fabs.f64(double %x)
+ %mul = fmul double %fabs.x, 4.0
+ ret double %mul
+}
+
+define double @v_mul_fabs_8_f64(double %x) {
+; GFX9-SDAG-LABEL: v_mul_fabs_8_f64:
+; GFX9-SDAG: ; %bb.0:
+; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 3
+; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: v_mul_fabs_8_f64:
+; GFX9-GISEL: ; %bb.0:
+; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX9-GISEL-NEXT: s_mov_b32 s5, 0x40200000
+; GFX9-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
+; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-SDAG-LABEL: v_mul_fabs_8_f64:
+; GFX10-SDAG: ; %bb.0:
+; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 3
+; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-GISEL-LABEL: v_mul_fabs_8_f64:
+; GFX10-GISEL: ; %bb.0:
+; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-GISEL-NEXT: s_mov_b32 s4, 0
+; GFX10-GISEL-NEXT: s_mov_b32 s5, 0x40200000
+; GFX10-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[4:5]
+; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-SDAG-LABEL: v_mul_fabs_8_f64:
+; GFX11-SDAG: ; %bb.0:
+; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-SDAG-NEXT: v_ldexp_f64 v[0:1], |v[0:1]|, 3
+; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-GISEL-LABEL: v_mul_fabs_8_f64:
+; GFX11-GISEL: ; %bb.0:
+; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-GISEL-NEXT: s_mov_b32 s0, 0
+; GFX11-GISEL-NEXT: s_mov_b32 s1, 0x40200000
+; GFX11-GISEL-NEXT: v_mul_f64 v[0:1], |v[0:1]|, s[0:1]
+; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+ %fabs.x = call double @llvm.fabs.f64(double %x)
+ %mul = fmul double %fabs.x, 8.0
+ ret double %mul
+}
+
declare half @llvm.experimental.constrained.fmul.f16(half, half, metadata, metadata)
declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
More information about the llvm-commits
mailing list