[llvm] [AMDGPU] Form V_MAD_U64_U32 from mul24 (PR #72393)

Matt Arsenault via llvm-commits llvm-commits at lists.llvm.org
Thu Nov 30 21:35:09 PST 2023


================
@@ -6928,7 +6962,119 @@ entry:
   ret <2 x i16> %add0
 }
 
+define i64 @mul_u24_add64(i32 %x, i32 %y, i64 %z) {
+; GFX67-LABEL: mul_u24_add64:
+; GFX67:       ; %bb.0:
+; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT:    v_mul_hi_u32_u24_e32 v4, v0, v1
+; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; GFX67-NEXT:    v_addc_u32_e32 v1, vcc, v4, v3, vcc
+; GFX67-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: mul_u24_add64:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mul_hi_u32_u24_e32 v4, v0, v1
+; GFX8-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v4, v3, vcc
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: mul_u24_add64:
+; GFX900-SDAG:       ; %bb.0:
+; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT:    v_mul_hi_u32_u24_e32 v4, v0, v1
+; GFX900-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX900-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX900-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v4, v3, vcc
+; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: mul_u24_add64:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_mul_hi_u32_u24_e32 v4, v0, v1
+; GFX9-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v4, v3, vcc
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: mul_u24_add64:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3]
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: mul_u24_add64:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_mul_u32_u24_e32 v4, v0, v1
+; GFX10-NEXT:    v_mul_hi_u32_u24_e32 v1, v0, v1
+; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v4, v2
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %mul = call i64 @llvm.amdgcn.mul.u24.i64(i32 %x, i32 %y)
+  %add = add nuw nsw i64 %mul, %z
+  ret i64 %add
+}
+
+define i64 @mul_u24_zext_add64(i32 %x, i32 %y, i64 %z) {
+; GFX67-LABEL: mul_u24_zext_add64:
+; GFX67:       ; %bb.0:
+; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; GFX67-NEXT:    v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX67-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: mul_u24_zext_add64:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: mul_u24_zext_add64:
+; GFX900-SDAG:       ; %bb.0:
+; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX900-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX900-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: mul_u24_zext_add64:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: mul_u24_zext_add64:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3]
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: mul_u24_zext_add64:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %mul = call i32 @llvm.amdgcn.mul.u24(i32 %x, i32 %y)
+  %mul.zext = zext i32 %mul to i64
+  %add = add nuw nsw i64 %mul.zext, %z
----------------
arsenm wrote:

Drop the flags, pretty sure you don't need them 

https://github.com/llvm/llvm-project/pull/72393


More information about the llvm-commits mailing list