[llvm] [AMDGPU] Form V_MAD_U64_U32 from mul24 (PR #72393)

Pierre van Houtryve via llvm-commits llvm-commits at lists.llvm.org
Fri Dec 1 01:35:48 PST 2023


https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/72393

>From a6f26366f9240851aa0ecb9a19b8ab952773ba76 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Wed, 15 Nov 2023 15:20:32 +0100
Subject: [PATCH 1/2] [AMDGPU] Form V_MAD_U64_U32 from mul24

See SWDEV-421067
---
 llvm/lib/Target/AMDGPU/AMDGPU.td              |   2 +
 llvm/lib/Target/AMDGPU/VOP3Instructions.td    |  21 +-
 .../CodeGen/AMDGPU/integer-mad-patterns.ll    | 214 +++++++++++++++---
 3 files changed, 201 insertions(+), 36 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/AMDGPU.td b/llvm/lib/Target/AMDGPU/AMDGPU.td
index f7315ecb9fa6495..b1fcc34da1eda7d 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPU.td
+++ b/llvm/lib/Target/AMDGPU/AMDGPU.td
@@ -2009,6 +2009,8 @@ def HasNoCvtFP8VOP1Bug : Predicate<"!Subtarget->hasCvtFP8VOP1Bug()">;
 
 def HasAtomicCSubNoRtnInsts : Predicate<"Subtarget->hasAtomicCSubNoRtnInsts()">;
 
+def HasFullRate64Ops : Predicate<"Subtarget->hasFullRate64Ops()">;
+
 // Include AMDGPU TD files
 include "SISchedule.td"
 include "GCNProcessors.td"
diff --git a/llvm/lib/Target/AMDGPU/VOP3Instructions.td b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
index 114d33b077866a1..3734a226c2bfc22 100644
--- a/llvm/lib/Target/AMDGPU/VOP3Instructions.td
+++ b/llvm/lib/Target/AMDGPU/VOP3Instructions.td
@@ -678,9 +678,26 @@ multiclass IMAD32_Pats <VOP3_Pseudo inst> {
         >;
 }
 
+// Handle cases where amdgpu-codegenprepare-mul24 made a mul24 instead of a normal mul.
+// We need to separate this because otherwise OtherPredicates would be overriden.
+multiclass IMAD32_Mul24_Pats <VOP3_Pseudo inst> {
+  def : GCNPat <
+      (i64 (add (i64 (AMDGPUmul_u24 i32:$src0, i32:$src1)), i64:$src2)),
+      (inst $src0, $src1, $src2, 0 /* clamp */)
+      >;
+  def : GCNPat <
+      (i64 (add (i64 (zext (i32 (AMDGPUmul_u24 i32:$src0, i32:$src1)))), i64:$src2)),
+      (inst $src0, $src1, $src2, 0 /* clamp */)
+      >;
+}
+
 // exclude pre-GFX9 where it was slow
-let OtherPredicates = [HasNotMADIntraFwdBug], SubtargetPredicate = isGFX9Plus in
-  defm : IMAD32_Pats<V_MAD_U64_U32_e64>;
+let SubtargetPredicate = isGFX9Plus in {
+  let OtherPredicates = [HasNotMADIntraFwdBug] in
+    defm : IMAD32_Pats<V_MAD_U64_U32_e64>;
+  let OtherPredicates = [HasNotMADIntraFwdBug, HasFullRate64Ops] in
+    defm : IMAD32_Mul24_Pats<V_MAD_U64_U32_e64>;
+}
 let OtherPredicates = [HasMADIntraFwdBug], SubtargetPredicate = isGFX11Only in
   defm : IMAD32_Pats<V_MAD_U64_U32_gfx11_e64>;
 
diff --git a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
index 61017e809c86365..f57ace4a75e814c 100644
--- a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
+++ b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
@@ -8,8 +8,8 @@
 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-SDAG %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx803 < %s | FileCheck -check-prefixes=GFX8,GFX8-GISEL %s
 
-; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-SDAG,GFX900-SDAG %s
-; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX900,GFX9-GISEL,GFX900-GISEL %s
+; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG,GFX900-SDAG,GFX900 %s
+; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx900 < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL,GFX900-GISEL,GFX900 %s
 
 ; RUN: llc -global-isel=0 -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90A,GFX9-SDAG,GFX90A-SDAG %s
 ; RUN: llc -global-isel=1 -mtriple=amdgcn-amd-amdpal -mcpu=gfx90a < %s | FileCheck -check-prefixes=GFX9,GFX90A,GFX9-GISEL,GFX90A-GISEL %s
@@ -4577,23 +4577,41 @@ define i32 @v_multi_use_mul_chain_add_other_use_all(i32 %arg, i32 %arg1, i32 %ar
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v5, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-LABEL: v_multi_use_mul_chain_add_other_use_all:
-; GFX900:       ; %bb.0: ; %bb
-; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_add_u32_e32 v0, 1, v0
-; GFX900-NEXT:    v_mul_lo_u32 v2, v0, v1
-; GFX900-NEXT:    v_add_u32_e32 v0, v2, v0
-; GFX900-NEXT:    v_mul_lo_u32 v0, v0, v1
-; GFX900-NEXT:    v_add_u32_e32 v1, 1, v2
-; GFX900-NEXT:    v_mul_lo_u32 v5, v0, v1
-; GFX900-NEXT:    global_store_dword v[3:4], v2, off
-; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    global_store_dword v[3:4], v0, off
-; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    global_store_dword v[3:4], v5, off
-; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_add_u32_e32 v0, v5, v1
-; GFX900-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_all:
+; GFX900-SDAG:       ; %bb.0: ; %bb
+; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
+; GFX900-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
+; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, v2, v0
+; GFX900-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
+; GFX900-SDAG-NEXT:    v_add_u32_e32 v1, 1, v2
+; GFX900-SDAG-NEXT:    v_mul_lo_u32 v5, v0, v1
+; GFX900-SDAG-NEXT:    global_store_dword v[3:4], v2, off
+; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX900-SDAG-NEXT:    global_store_dword v[3:4], v0, off
+; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX900-SDAG-NEXT:    global_store_dword v[3:4], v5, off
+; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, v5, v1
+; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_all:
+; GFX900-GISEL:       ; %bb.0: ; %bb
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
+; GFX900-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
+; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, v2, v0
+; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
+; GFX900-GISEL-NEXT:    v_add_u32_e32 v1, 1, v2
+; GFX900-GISEL-NEXT:    v_mul_lo_u32 v5, v0, v1
+; GFX900-GISEL-NEXT:    global_store_dword v[3:4], v2, off
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX900-GISEL-NEXT:    global_store_dword v[3:4], v0, off
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX900-GISEL-NEXT:    global_store_dword v[3:4], v5, off
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, v5, v1
+; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX90A-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_all:
 ; GFX90A-SDAG:       ; %bb.0: ; %bb
@@ -4761,21 +4779,37 @@ define i32 @v_multi_use_mul_chain_add_other_use_some(i32 %arg, i32 %arg1, i32 %a
 ; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v1
 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 ;
-; GFX900-LABEL: v_multi_use_mul_chain_add_other_use_some:
-; GFX900:       ; %bb.0: ; %bb
-; GFX900-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
-; GFX900-NEXT:    v_add_u32_e32 v0, 1, v0
-; GFX900-NEXT:    v_mul_lo_u32 v2, v0, v1
-; GFX900-NEXT:    v_add_u32_e32 v0, v2, v0
-; GFX900-NEXT:    v_mul_lo_u32 v0, v0, v1
-; GFX900-NEXT:    v_add_u32_e32 v1, 1, v2
-; GFX900-NEXT:    v_mul_lo_u32 v0, v0, v1
-; GFX900-NEXT:    global_store_dword v[3:4], v2, off
-; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    global_store_dword v[3:4], v0, off
-; GFX900-NEXT:    s_waitcnt vmcnt(0)
-; GFX900-NEXT:    v_add_u32_e32 v0, v0, v1
-; GFX900-NEXT:    s_setpc_b64 s[30:31]
+; GFX900-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_some:
+; GFX900-SDAG:       ; %bb.0: ; %bb
+; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, 1, v0
+; GFX900-SDAG-NEXT:    v_mul_lo_u32 v2, v0, v1
+; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, v2, v0
+; GFX900-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
+; GFX900-SDAG-NEXT:    v_add_u32_e32 v1, 1, v2
+; GFX900-SDAG-NEXT:    v_mul_lo_u32 v0, v0, v1
+; GFX900-SDAG-NEXT:    global_store_dword v[3:4], v2, off
+; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX900-SDAG-NEXT:    global_store_dword v[3:4], v0, off
+; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0)
+; GFX900-SDAG-NEXT:    v_add_u32_e32 v0, v0, v1
+; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX900-GISEL-LABEL: v_multi_use_mul_chain_add_other_use_some:
+; GFX900-GISEL:       ; %bb.0: ; %bb
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, 1, v0
+; GFX900-GISEL-NEXT:    v_mul_lo_u32 v2, v0, v1
+; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, v2, v0
+; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
+; GFX900-GISEL-NEXT:    v_add_u32_e32 v1, 1, v2
+; GFX900-GISEL-NEXT:    v_mul_lo_u32 v0, v0, v1
+; GFX900-GISEL-NEXT:    global_store_dword v[3:4], v2, off
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX900-GISEL-NEXT:    global_store_dword v[3:4], v0, off
+; GFX900-GISEL-NEXT:    s_waitcnt vmcnt(0)
+; GFX900-GISEL-NEXT:    v_add_u32_e32 v0, v0, v1
+; GFX900-GISEL-NEXT:    s_setpc_b64 s[30:31]
 ;
 ; GFX90A-SDAG-LABEL: v_multi_use_mul_chain_add_other_use_some:
 ; GFX90A-SDAG:       ; %bb.0: ; %bb
@@ -6928,7 +6962,119 @@ entry:
   ret <2 x i16> %add0
 }
 
+define i64 @mul_u24_add64(i32 %x, i32 %y, i64 %z) {
+; GFX67-LABEL: mul_u24_add64:
+; GFX67:       ; %bb.0:
+; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT:    v_mul_hi_u32_u24_e32 v4, v0, v1
+; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; GFX67-NEXT:    v_addc_u32_e32 v1, vcc, v4, v3, vcc
+; GFX67-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: mul_u24_add64:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mul_hi_u32_u24_e32 v4, v0, v1
+; GFX8-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, v4, v3, vcc
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: mul_u24_add64:
+; GFX900-SDAG:       ; %bb.0:
+; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT:    v_mul_hi_u32_u24_e32 v4, v0, v1
+; GFX900-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX900-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX900-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, v4, v3, vcc
+; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: mul_u24_add64:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_mul_hi_u32_u24_e32 v4, v0, v1
+; GFX9-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, v4, v3, vcc
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: mul_u24_add64:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3]
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: mul_u24_add64:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_mul_u32_u24_e32 v4, v0, v1
+; GFX10-NEXT:    v_mul_hi_u32_u24_e32 v1, v0, v1
+; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v4, v2
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %mul = call i64 @llvm.amdgcn.mul.u24.i64(i32 %x, i32 %y)
+  %add = add nuw nsw i64 %mul, %z
+  ret i64 %add
+}
+
+define i64 @mul_u24_zext_add64(i32 %x, i32 %y, i64 %z) {
+; GFX67-LABEL: mul_u24_zext_add64:
+; GFX67:       ; %bb.0:
+; GFX67-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX67-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX67-NEXT:    v_add_i32_e32 v0, vcc, v0, v2
+; GFX67-NEXT:    v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX67-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX8-LABEL: mul_u24_zext_add64:
+; GFX8:       ; %bb.0:
+; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX8-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX8-NEXT:    v_add_u32_e32 v0, vcc, v0, v2
+; GFX8-NEXT:    v_addc_u32_e32 v1, vcc, 0, v3, vcc
+; GFX8-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX900-SDAG-LABEL: mul_u24_zext_add64:
+; GFX900-SDAG:       ; %bb.0:
+; GFX900-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX900-SDAG-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX900-SDAG-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX900-SDAG-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX900-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX9-GISEL-LABEL: mul_u24_zext_add64:
+; GFX9-GISEL:       ; %bb.0:
+; GFX9-GISEL-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX9-GISEL-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX9-GISEL-NEXT:    v_add_co_u32_e32 v0, vcc, v0, v2
+; GFX9-GISEL-NEXT:    v_addc_co_u32_e32 v1, vcc, 0, v3, vcc
+; GFX9-GISEL-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX90A-SDAG-LABEL: mul_u24_zext_add64:
+; GFX90A-SDAG:       ; %bb.0:
+; GFX90A-SDAG-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX90A-SDAG-NEXT:    v_mad_u64_u32 v[0:1], s[4:5], v0, v1, v[2:3]
+; GFX90A-SDAG-NEXT:    s_setpc_b64 s[30:31]
+;
+; GFX10-LABEL: mul_u24_zext_add64:
+; GFX10:       ; %bb.0:
+; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
+; GFX10-NEXT:    v_mul_u32_u24_e32 v0, v0, v1
+; GFX10-NEXT:    v_add_co_u32 v0, vcc_lo, v0, v2
+; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, 0, v3, vcc_lo
+; GFX10-NEXT:    s_setpc_b64 s[30:31]
+  %mul = call i32 @llvm.amdgcn.mul.u24(i32 %x, i32 %y)
+  %mul.zext = zext i32 %mul to i64
+  %add = add nuw nsw i64 %mul.zext, %z
+  ret i64 %add
+}
+
+declare i64 @llvm.amdgcn.mul.u24.i64(i32, i32)
+declare i32 @llvm.amdgcn.mul.u24(i32, i32)
+
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; GFX6: {{.*}}
 ; GFX7: {{.*}}
+; GFX900: {{.*}}
 ; GFX90A: {{.*}}

>From 50d48c3aadff749262aff1bd3a40ca90d2cfe1a7 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Fri, 1 Dec 2023 10:35:31 +0100
Subject: [PATCH 2/2] Drop add flags

---
 llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
index f57ace4a75e814c..d9afe78d0b68100 100644
--- a/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
+++ b/llvm/test/CodeGen/AMDGPU/integer-mad-patterns.ll
@@ -7014,7 +7014,7 @@ define i64 @mul_u24_add64(i32 %x, i32 %y, i64 %z) {
 ; GFX10-NEXT:    v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %mul = call i64 @llvm.amdgcn.mul.u24.i64(i32 %x, i32 %y)
-  %add = add nuw nsw i64 %mul, %z
+  %add = add i64 %mul, %z
   ret i64 %add
 }
 
@@ -7066,7 +7066,7 @@ define i64 @mul_u24_zext_add64(i32 %x, i32 %y, i64 %z) {
 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
   %mul = call i32 @llvm.amdgcn.mul.u24(i32 %x, i32 %y)
   %mul.zext = zext i32 %mul to i64
-  %add = add nuw nsw i64 %mul.zext, %z
+  %add = add i64 %mul.zext, %z
   ret i64 %add
 }
 



More information about the llvm-commits mailing list