[llvm] [AMDGPU] Fix mul combine for MUL24 (PR #79110)

Mon Jan 29 05:55:05 PST 2024

================
@@ -0,0 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+
+; Checks that the DAG mul combine can handle a MUL24 with a i32 and i64
+; operand.
+
+define i64 @test(i64 %x, i32 %z) {
+; CHECK-LABEL: test:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_and_b32_e32 v1, 1, v2
+; CHECK-NEXT:    v_add_u32_e32 v1, 1, v1
+; CHECK-NEXT:    v_mul_u32_u24_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %a = add i64 %x, 0
+  %b = and i64 %a, 255
+  %c = and i32 %z, 1
+  %d = add nuw nsw i32 %c, 1
+  %e = zext nneg i32 %d to i64
+  %f = mul nuw nsw i64 %b, %e
+  %g = add nuw nsw i64 %f, 0
+  ret i64 %g
+}
+
+define i64 @test_swapped(i64 %x, i32 %z) {
+; CHECK-LABEL: test_swapped:
+; CHECK:       ; %bb.0: ; %entry
+; CHECK-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT:    v_and_b32_e32 v1, 1, v2
+; CHECK-NEXT:    v_add_u32_e32 v1, 1, v1
+; CHECK-NEXT:    v_mul_u32_u24_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0
+; CHECK-NEXT:    v_mov_b32_e32 v1, 0
+; CHECK-NEXT:    s_setpc_b64 s[30:31]
+entry:
+  %a = add i64 %x, 0
+  %b = and i64 %a, 255
+  %c = and i32 %z, 1
+  %d = add nuw nsw i32 %c, 1
+  %e = zext nneg i32 %d to i64
+  %f = mul nuw nsw i64 %e, %b
+  %g = add nuw nsw i64 %f, 0
----------------
arsenm wrote:

Are the adds of 0 really necessary? Can the test merge in with the existing test?

https://github.com/llvm/llvm-project/pull/79110