[llvm] [AMDGPU] Select v_lshl_add_u32 instead of v_mul_lo_u32 by constant (PR #71035)
Matt Arsenault via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 2 01:28:26 PDT 2023
================
@@ -2395,6 +2395,95 @@ entry:
ret void
}
+define i32 @mul_pow2_plus_1(i32 %val) {
+; SI-LABEL: mul_pow2_plus_1:
+; SI: ; %bb.0:
+; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT: v_lshlrev_b32_e32 v1, 3, v0
+; SI-NEXT: v_add_i32_e32 v0, vcc, v1, v0
+; SI-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: mul_pow2_plus_1:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_lshlrev_b32_e32 v1, 3, v0
+; VI-NEXT: v_add_u32_e32 v0, vcc, v1, v0
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: mul_pow2_plus_1:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshl_add_u32 v0, v0, 3, v0
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: mul_pow2_plus_1:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_lshl_add_u32 v0, v0, 3, v0
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: mul_pow2_plus_1:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_lshl_add_u32 v0, v0, 3, v0
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; EG-LABEL: mul_pow2_plus_1:
+; EG: ; %bb.0:
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+ %mul = mul i32 %val, 9
+ ret i32 %mul
+}
+
+define i32 @mul_pow2_plus_1_add(i32 %val) {
+; SI-LABEL: mul_pow2_plus_1_add:
+; SI: ; %bb.0:
+; SI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; SI-NEXT: v_lshlrev_b32_e32 v1, 3, v0
+; SI-NEXT: v_add_i32_e32 v0, vcc, v1, v0
+; SI-NEXT: v_add_i32_e32 v0, vcc, 1, v0
+; SI-NEXT: s_setpc_b64 s[30:31]
+;
+; VI-LABEL: mul_pow2_plus_1_add:
+; VI: ; %bb.0:
+; VI-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; VI-NEXT: v_lshlrev_b32_e32 v1, 3, v0
+; VI-NEXT: v_add_u32_e32 v0, vcc, v1, v0
+; VI-NEXT: v_add_u32_e32 v0, vcc, 1, v0
+; VI-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX9-LABEL: mul_pow2_plus_1_add:
+; GFX9: ; %bb.0:
+; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-NEXT: v_lshlrev_b32_e32 v1, 3, v0
+; GFX9-NEXT: v_add3_u32 v0, v1, v0, 1
+; GFX9-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: mul_pow2_plus_1_add:
+; GFX10: ; %bb.0:
+; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT: v_lshlrev_b32_e32 v1, 3, v0
+; GFX10-NEXT: v_add3_u32 v0, v1, v0, 1
+; GFX10-NEXT: s_setpc_b64 s[30:31]
+;
+; GFX11-LABEL: mul_pow2_plus_1_add:
+; GFX11: ; %bb.0:
+; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX11-NEXT: v_lshlrev_b32_e32 v1, 3, v0
+; GFX11-NEXT: s_delay_alu instid0(VALU_DEP_1)
+; GFX11-NEXT: v_add3_u32 v0, v1, v0, 1
+; GFX11-NEXT: s_setpc_b64 s[30:31]
+;
+; EG-LABEL: mul_pow2_plus_1_add:
+; EG: ; %bb.0:
+; EG-NEXT: CF_END
+; EG-NEXT: PAD
+ %mul = mul i32 %val, 9
+ %add = add i32 %mul, 1
+ ret i32 %add
+}
+
----------------
arsenm wrote:
Also should repeat for uniform/SGPR cases.
https://github.com/llvm/llvm-project/pull/71035
More information about the llvm-commits
mailing list