[llvm] [AMDGPU] [GlobalIsel] Combine Fmul with Select into ldexp instruction. (PR #120104)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 5 22:53:42 PST 2025
================
@@ -3307,488 +3307,458 @@ define amdgpu_ps i32 @s_mul_32_f16(half inreg %x, half inreg %y) {
; --------------------------------------------------------------------
define float @v_mul_f32_select_64_1(i32 %arg, float %x) {
-; GFX9-SDAG-LABEL: v_mul_f32_select_64_1:
-; GFX9-SDAG: ; %bb.0:
-; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc
-; GFX9-SDAG-NEXT: v_ldexp_f32 v0, v1, v0
-; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
-;
-; GFX9-GISEL-LABEL: v_mul_f32_select_64_1:
-; GFX9-GISEL: ; %bb.0:
-; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
-; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e32 v0, 1.0, v2, vcc
-; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
-; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX9-LABEL: v_mul_f32_select_64_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc
+; GFX9-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-SDAG-LABEL: v_mul_f32_select_64_1:
-; GFX10-SDAG: ; %bb.0:
-; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
-; GFX10-SDAG-NEXT: v_ldexp_f32 v0, v1, v0
-; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX1011-LABEL: v_mul_f32_select_64_1:
+; GFX1011: ; %bb.0:
+; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
+; GFX1011-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX1011-NEXT: s_setpc_b64 s[30:31]
+ %cond = icmp eq i32 %arg, 0
+ %select.pow2 = select i1 %cond, float 64.0, float 1.0
+ %mul = fmul float %x, %select.pow2
+ ret float %mul
+}
+
+define float @v_mul_f32_select_1_64(i32 %arg, float %x) {
+; GFX9-LABEL: v_mul_f32_select_1_64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
+; GFX9-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX10-GISEL-LABEL: v_mul_f32_select_64_1:
-; GFX10-GISEL: ; %bb.0:
-; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX10-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX10-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x42800000, vcc_lo
-; GFX10-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
-; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1011-LABEL: v_mul_f32_select_1_64:
+; GFX1011: ; %bb.0:
+; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1011-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX1011-NEXT: v_ldexp_f32 v0, v1, v0
+; GFX1011-NEXT: s_setpc_b64 s[30:31]
+ %cond = icmp eq i32 %arg, 0
+ %select.pow2 = select i1 %cond, float 1.0, float 64.0
+ %mul = fmul float %x, %select.pow2
+ ret float %mul
+}
+
+define float @v_mul_f32_select_n1_n64(i32 %arg, float %x) {
+; GFX9-LABEL: v_mul_f32_select_n1_n64:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
+; GFX9-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-SDAG-LABEL: v_mul_f32_select_64_1:
-; GFX11-SDAG: ; %bb.0:
-; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-SDAG-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-SDAG-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
-; GFX11-SDAG-NEXT: v_ldexp_f32 v0, v1, v0
-; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
+; GFX1011-LABEL: v_mul_f32_select_n1_n64:
+; GFX1011: ; %bb.0:
+; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1011-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc_lo
+; GFX1011-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX1011-NEXT: s_setpc_b64 s[30:31]
+ %cond = icmp eq i32 %arg, 0
+ %select.pow2 = select i1 %cond, float -1.0, float -64.0
+ %mul = fmul float %x, %select.pow2
+ ret float %mul
+}
+
+define float @v_mul_f32_select_n64_n1(i32 %arg, float %x) {
+; GFX9-LABEL: v_mul_f32_select_n64_n1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
+; GFX9-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc
+; GFX9-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
;
-; GFX11-GISEL-LABEL: v_mul_f32_select_64_1:
-; GFX11-GISEL: ; %bb.0:
-; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX11-GISEL-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
-; GFX11-GISEL-NEXT: v_cndmask_b32_e64 v0, 1.0, 0x42800000, vcc_lo
-; GFX11-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
-; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
+; GFX1011-LABEL: v_mul_f32_select_n64_n1:
+; GFX1011: ; %bb.0:
+; GFX1011-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX1011-NEXT: v_cmp_eq_u32_e32 vcc_lo, 0, v0
+; GFX1011-NEXT: v_cndmask_b32_e64 v0, 0, 6, vcc_lo
+; GFX1011-NEXT: v_ldexp_f32 v0, -v1, v0
+; GFX1011-NEXT: s_setpc_b64 s[30:31]
%cond = icmp eq i32 %arg, 0
- %select.pow2 = select i1 %cond, float 64.0, float 1.0
+ %select.pow2 = select i1 %cond, float -64.0, float -1.0
%mul = fmul float %x, %select.pow2
ret float %mul
}
-define float @v_mul_f32_select_1_64(i32 %arg, float %x) {
-; GFX9-SDAG-LABEL: v_mul_f32_select_1_64:
+define float @v_mul_f32_select_128_64(i32 %arg, float %x) {
+; GFX9-SDAG-LABEL: v_mul_f32_select_128_64:
; GFX9-SDAG: ; %bb.0:
; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
; GFX9-SDAG-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 0, vcc
+; GFX9-SDAG-NEXT: v_cndmask_b32_e64 v0, 6, 7, vcc
; GFX9-SDAG-NEXT: v_ldexp_f32 v0, v1, v0
; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
;
-; GFX9-GISEL-LABEL: v_mul_f32_select_1_64:
+; GFX9-GISEL-LABEL: v_mul_f32_select_128_64:
; GFX9-GISEL: ; %bb.0:
; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, 0x42800000
; GFX9-GISEL-NEXT: v_cmp_eq_u32_e32 vcc, 0, v0
-; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, v2, 1.0, vcc
-; GFX9-GISEL-NEXT: v_mul_f32_e32 v0, v1, v0
+; GFX9-GISEL-NEXT: v_cndmask_b32_e64 v0, 0, 1, vcc
+; GFX9-GISEL-NEXT: v_add_u32_e32 v0, 6, v0
----------------
arsenm wrote:
So the pattern mentioned in #121145 only occurs when there is a zext of i1? But it would work if we expanded the zext as a select? I think we should just be more aggressively turning extend of boolean into select so we don't need to special case it.
https://github.com/llvm/llvm-project/pull/120104
More information about the llvm-commits
mailing list