[llvm] [AMDGPU] Fix mul combine for MUL24 (PR #79110)
Pierre van Houtryve via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 23 00:43:04 PST 2024
https://github.com/Pierre-vh created https://github.com/llvm/llvm-project/pull/79110
MUL24 can now return a i64 for i32 operands, but the combine was never updated to handle this case. Extend the operand when rewriting the ADD to handle it.
Fixes SWDEV-436654
>From ef01c948c7658543661f0a54abd93f893785129d Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 23 Jan 2024 09:42:02 +0100
Subject: [PATCH] [AMDGPU] Fix mul combine for MUL24
MUL24 can now return a i64 for i32 operands, but the combine was never updated to handle this case.
Extend the operand when rewriting the ADD to handle it.
---
llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp | 4 +-
llvm/test/CodeGen/AMDGPU/mul-combine-crash.ll | 47 +++++++++++++++++++
2 files changed, 49 insertions(+), 2 deletions(-)
create mode 100644 llvm/test/CodeGen/AMDGPU/mul-combine-crash.ll
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index 55d95154c75878b..109e86eb4117a2f 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -4246,12 +4246,12 @@ SDValue AMDGPUTargetLowering::performMulCombine(SDNode *N,
// operands, so we have to place the mul in the LHS
if (SDValue MulOper = IsFoldableAdd(N0)) {
SDValue MulVal = DAG.getNode(N->getOpcode(), DL, VT, N1, MulOper);
- return DAG.getNode(ISD::ADD, DL, VT, MulVal, N1);
+ return DAG.getNode(ISD::ADD, DL, VT, MulVal, DAG.getZExtOrTrunc(N1, DL, VT));
}
if (SDValue MulOper = IsFoldableAdd(N1)) {
SDValue MulVal = DAG.getNode(N->getOpcode(), DL, VT, N0, MulOper);
- return DAG.getNode(ISD::ADD, DL, VT, MulVal, N0);
+ return DAG.getNode(ISD::ADD, DL, VT, MulVal, DAG.getZExtOrTrunc(N0, DL, VT));
}
// Skip if already mul24.
diff --git a/llvm/test/CodeGen/AMDGPU/mul-combine-crash.ll b/llvm/test/CodeGen/AMDGPU/mul-combine-crash.ll
new file mode 100644
index 000000000000000..624c8aa859c0939
--- /dev/null
+++ b/llvm/test/CodeGen/AMDGPU/mul-combine-crash.ll
@@ -0,0 +1,47 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck %s
+
+; Checks that the DAG mul combine can handle a MUL24 with a i32 and i64
+; operand.
+
+define i64 @test(i64 %x, i32 %z) {
+; CHECK-LABEL: test:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_and_b32_e32 v0, 0xff, v0
+; CHECK-NEXT: v_and_b32_e32 v2, 1, v2
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, v[0:1]
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %a = add i64 %x, 0
+ %b = and i64 %a, 255
+ %c = and i32 %z, 1
+ %d = add nuw nsw i32 %c, 1
+ %e = zext nneg i32 %d to i64
+ %f = mul nuw nsw i64 %b, %e
+ %g = add nuw nsw i64 %f, 0
+ ret i64 %g
+}
+
+define i64 @test_swapped(i64 %x, i32 %z) {
+; CHECK-LABEL: test_swapped:
+; CHECK: ; %bb.0: ; %entry
+; CHECK-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; CHECK-NEXT: v_and_b32_e32 v0, 0xff, v0
+; CHECK-NEXT: v_and_b32_e32 v2, 1, v2
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: v_mad_u64_u32 v[0:1], s[4:5], v0, v2, v[0:1]
+; CHECK-NEXT: v_mov_b32_e32 v1, 0
+; CHECK-NEXT: s_setpc_b64 s[30:31]
+entry:
+ %a = add i64 %x, 0
+ %b = and i64 %a, 255
+ %c = and i32 %z, 1
+ %d = add nuw nsw i32 %c, 1
+ %e = zext nneg i32 %d to i64
+ %f = mul nuw nsw i64 %e, %b
+ %g = add nuw nsw i64 %f, 0
+ ret i64 %g
+}
More information about the llvm-commits
mailing list