[llvm] [NVPTX] Reland `mad.wide` combine under (default off) CLI option (PR #160214)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 23 22:58:33 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: Justin Fargnoli (justinfargnoli)
<details>
<summary>Changes</summary>
Follow-up to #<!-- -->155024 based on reported regressions to important matmul kernels.
---
Patch is 27.36 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/160214.diff
4 Files Affected:
- (modified) llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp (+6)
- (modified) llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h (+1)
- (modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (+9-1)
- (modified) llvm/test/CodeGen/NVPTX/combine-wide.ll (+350-214)
``````````diff
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
index c70f48af33cf2..b7de0a4554cd3 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp
@@ -38,6 +38,10 @@ static cl::opt<bool>
EnableRsqrtOpt("nvptx-rsqrt-approx-opt", cl::init(true), cl::Hidden,
cl::desc("Enable reciprocal sqrt optimization"));
+static cl::opt<bool> EnableMADWide("nvptx-mad-wide-opt", cl::init(false),
+ cl::Hidden,
+ cl::desc("Enable MAD wide optimization"));
+
/// createNVPTXISelDag - This pass converts a legalized DAG into a
/// NVPTX-specific DAG, ready for instruction scheduling.
FunctionPass *llvm::createNVPTXISelDag(NVPTXTargetMachine &TM,
@@ -84,6 +88,8 @@ bool NVPTXDAGToDAGISel::allowFMA() const {
bool NVPTXDAGToDAGISel::doRsqrtOpt() const { return EnableRsqrtOpt; }
+bool NVPTXDAGToDAGISel::doMADWideOpt() const { return EnableMADWide; }
+
/// Select - Select instructions not customized! Used for
/// expanded, promoted and normal instructions.
void NVPTXDAGToDAGISel::Select(SDNode *N) {
diff --git a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
index 8dcd5362c4512..c912e709d0aa0 100644
--- a/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
+++ b/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h
@@ -45,6 +45,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXDAGToDAGISel : public SelectionDAGISel {
bool useF32FTZ() const;
bool allowFMA() const;
bool doRsqrtOpt() const;
+ bool doMADWideOpt() const;
NVPTXScopes Scopes{};
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 4e38e026e6bda..4e873558b2537 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -114,6 +114,7 @@ def hasArchAccelFeatures : Predicate<"Subtarget->hasArchAccelFeatures()">;
def doF32FTZ : Predicate<"useF32FTZ()">;
def doNoF32FTZ : Predicate<"!useF32FTZ()">;
def doRsqrtOpt : Predicate<"doRsqrtOpt()">;
+def doMADWideOpt : Predicate<"doMADWideOpt()">;
def hasHWROT32 : Predicate<"Subtarget->hasHWROT32()">;
def noHWROT32 : Predicate<"!Subtarget->hasHWROT32()">;
@@ -899,8 +900,15 @@ let Predicates = [hasOptEnabled] in {
defm MAD_LO_S32 : MADInst<"lo.s32", mul, I32RT, I32RT>;
defm MAD_LO_S64 : MADInst<"lo.s64", mul, I64RT, I64RT>;
- // Generating mad.wide causes a regression:
+ // Generating mad.wide causes a regression in some cases:
// https://github.com/llvm/llvm-project/pull/150477#issuecomment-3191367837
+ // Only do so when the user requests it.
+ let Predicates = [doMADWideOpt] in {
+ defm MAD_WIDE_U16 : MADInst<"wide.u16", umul_wide, I32RT, I16RT>;
+ defm MAD_WIDE_S16 : MADInst<"wide.s16", smul_wide, I32RT, I16RT>;
+ defm MAD_WIDE_U32 : MADInst<"wide.u32", umul_wide, I64RT, I32RT>;
+ defm MAD_WIDE_S32 : MADInst<"wide.s32", smul_wide, I64RT, I32RT>;
+ }
}
//-----------------------------------
diff --git a/llvm/test/CodeGen/NVPTX/combine-wide.ll b/llvm/test/CodeGen/NVPTX/combine-wide.ll
index b5948d37c3505..63e0f3789f49f 100644
--- a/llvm/test/CodeGen/NVPTX/combine-wide.ll
+++ b/llvm/test/CodeGen/NVPTX/combine-wide.ll
@@ -1,24 +1,37 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
-; RUN: llc < %s -O1 | FileCheck %s --check-prefixes=CHECK,O1
+; RUN: llc < %s -O1 | FileCheck %s --check-prefixes=CHECK,O1,O1-NO-MAD
+; RUN: llc < %s -O1 -nvptx-mad-wide-opt | FileCheck %s --check-prefixes=CHECK,O1,O1-MAD
; RUN: llc < %s -O0 | FileCheck %s --check-prefixes=CHECK,O0
target triple = "nvptx64-nvidia-cuda"
define i64 @t1(i32 %a, i32 %b, i64 %c) {
-;
-; O1-LABEL: t1(
-; O1: {
-; O1-NEXT: .reg .b32 %r<3>;
-; O1-NEXT: .reg .b64 %rd<4>;
-; O1-EMPTY:
-; O1-NEXT: // %bb.0:
-; O1-NEXT: ld.param.b32 %r1, [t1_param_0];
-; O1-NEXT: ld.param.b32 %r2, [t1_param_1];
-; O1-NEXT: mul.wide.s32 %rd1, %r1, %r2;
-; O1-NEXT: ld.param.b64 %rd2, [t1_param_2];
-; O1-NEXT: add.s64 %rd3, %rd2, %rd1;
-; O1-NEXT: st.param.b64 [func_retval0], %rd3;
-; O1-NEXT: ret;
+; O1-NO-MAD-LABEL: t1(
+; O1-NO-MAD: {
+; O1-NO-MAD-NEXT: .reg .b32 %r<3>;
+; O1-NO-MAD-NEXT: .reg .b64 %rd<4>;
+; O1-NO-MAD-EMPTY:
+; O1-NO-MAD-NEXT: // %bb.0:
+; O1-NO-MAD-NEXT: ld.param.b32 %r1, [t1_param_0];
+; O1-NO-MAD-NEXT: ld.param.b32 %r2, [t1_param_1];
+; O1-NO-MAD-NEXT: mul.wide.s32 %rd1, %r1, %r2;
+; O1-NO-MAD-NEXT: ld.param.b64 %rd2, [t1_param_2];
+; O1-NO-MAD-NEXT: add.s64 %rd3, %rd2, %rd1;
+; O1-NO-MAD-NEXT: st.param.b64 [func_retval0], %rd3;
+; O1-NO-MAD-NEXT: ret;
+;
+; O1-MAD-LABEL: t1(
+; O1-MAD: {
+; O1-MAD-NEXT: .reg .b32 %r<3>;
+; O1-MAD-NEXT: .reg .b64 %rd<3>;
+; O1-MAD-EMPTY:
+; O1-MAD-NEXT: // %bb.0:
+; O1-MAD-NEXT: ld.param.b32 %r1, [t1_param_0];
+; O1-MAD-NEXT: ld.param.b32 %r2, [t1_param_1];
+; O1-MAD-NEXT: ld.param.b64 %rd1, [t1_param_2];
+; O1-MAD-NEXT: mad.wide.s32 %rd2, %r1, %r2, %rd1;
+; O1-MAD-NEXT: st.param.b64 [func_retval0], %rd2;
+; O1-MAD-NEXT: ret;
;
; O0-LABEL: t1(
; O0: {
@@ -41,20 +54,32 @@ define i64 @t1(i32 %a, i32 %b, i64 %c) {
}
define i64 @t2(i32 %a, i32 %b, i64 %c) {
-;
-; O1-LABEL: t2(
-; O1: {
-; O1-NEXT: .reg .b32 %r<3>;
-; O1-NEXT: .reg .b64 %rd<4>;
-; O1-EMPTY:
-; O1-NEXT: // %bb.0:
-; O1-NEXT: ld.param.b32 %r1, [t2_param_0];
-; O1-NEXT: ld.param.b32 %r2, [t2_param_1];
-; O1-NEXT: mul.wide.s32 %rd1, %r1, %r2;
-; O1-NEXT: ld.param.b64 %rd2, [t2_param_2];
-; O1-NEXT: add.s64 %rd3, %rd1, %rd2;
-; O1-NEXT: st.param.b64 [func_retval0], %rd3;
-; O1-NEXT: ret;
+; O1-NO-MAD-LABEL: t2(
+; O1-NO-MAD: {
+; O1-NO-MAD-NEXT: .reg .b32 %r<3>;
+; O1-NO-MAD-NEXT: .reg .b64 %rd<4>;
+; O1-NO-MAD-EMPTY:
+; O1-NO-MAD-NEXT: // %bb.0:
+; O1-NO-MAD-NEXT: ld.param.b32 %r1, [t2_param_0];
+; O1-NO-MAD-NEXT: ld.param.b32 %r2, [t2_param_1];
+; O1-NO-MAD-NEXT: mul.wide.s32 %rd1, %r1, %r2;
+; O1-NO-MAD-NEXT: ld.param.b64 %rd2, [t2_param_2];
+; O1-NO-MAD-NEXT: add.s64 %rd3, %rd1, %rd2;
+; O1-NO-MAD-NEXT: st.param.b64 [func_retval0], %rd3;
+; O1-NO-MAD-NEXT: ret;
+;
+; O1-MAD-LABEL: t2(
+; O1-MAD: {
+; O1-MAD-NEXT: .reg .b32 %r<3>;
+; O1-MAD-NEXT: .reg .b64 %rd<3>;
+; O1-MAD-EMPTY:
+; O1-MAD-NEXT: // %bb.0:
+; O1-MAD-NEXT: ld.param.b32 %r1, [t2_param_0];
+; O1-MAD-NEXT: ld.param.b32 %r2, [t2_param_1];
+; O1-MAD-NEXT: ld.param.b64 %rd1, [t2_param_2];
+; O1-MAD-NEXT: mad.wide.s32 %rd2, %r1, %r2, %rd1;
+; O1-MAD-NEXT: st.param.b64 [func_retval0], %rd2;
+; O1-MAD-NEXT: ret;
;
; O0-LABEL: t2(
; O0: {
@@ -77,19 +102,30 @@ define i64 @t2(i32 %a, i32 %b, i64 %c) {
}
define i64 @t3(i32 %a, i32 %b) {
-;
-; O1-LABEL: t3(
-; O1: {
-; O1-NEXT: .reg .b32 %r<3>;
-; O1-NEXT: .reg .b64 %rd<3>;
-; O1-EMPTY:
-; O1-NEXT: // %bb.0:
-; O1-NEXT: ld.param.b32 %r1, [t3_param_0];
-; O1-NEXT: ld.param.b32 %r2, [t3_param_1];
-; O1-NEXT: mul.wide.s32 %rd1, %r1, %r2;
-; O1-NEXT: add.s64 %rd2, %rd1, 1;
-; O1-NEXT: st.param.b64 [func_retval0], %rd2;
-; O1-NEXT: ret;
+; O1-NO-MAD-LABEL: t3(
+; O1-NO-MAD: {
+; O1-NO-MAD-NEXT: .reg .b32 %r<3>;
+; O1-NO-MAD-NEXT: .reg .b64 %rd<3>;
+; O1-NO-MAD-EMPTY:
+; O1-NO-MAD-NEXT: // %bb.0:
+; O1-NO-MAD-NEXT: ld.param.b32 %r1, [t3_param_0];
+; O1-NO-MAD-NEXT: ld.param.b32 %r2, [t3_param_1];
+; O1-NO-MAD-NEXT: mul.wide.s32 %rd1, %r1, %r2;
+; O1-NO-MAD-NEXT: add.s64 %rd2, %rd1, 1;
+; O1-NO-MAD-NEXT: st.param.b64 [func_retval0], %rd2;
+; O1-NO-MAD-NEXT: ret;
+;
+; O1-MAD-LABEL: t3(
+; O1-MAD: {
+; O1-MAD-NEXT: .reg .b32 %r<3>;
+; O1-MAD-NEXT: .reg .b64 %rd<2>;
+; O1-MAD-EMPTY:
+; O1-MAD-NEXT: // %bb.0:
+; O1-MAD-NEXT: ld.param.b32 %r1, [t3_param_0];
+; O1-MAD-NEXT: ld.param.b32 %r2, [t3_param_1];
+; O1-MAD-NEXT: mad.wide.s32 %rd1, %r1, %r2, 1;
+; O1-MAD-NEXT: st.param.b64 [func_retval0], %rd1;
+; O1-MAD-NEXT: ret;
;
; O0-LABEL: t3(
; O0: {
@@ -111,19 +147,30 @@ define i64 @t3(i32 %a, i32 %b) {
}
define i64 @t4(i32 %a, i64 %c) {
-;
-; O1-LABEL: t4(
-; O1: {
-; O1-NEXT: .reg .b32 %r<2>;
-; O1-NEXT: .reg .b64 %rd<4>;
-; O1-EMPTY:
-; O1-NEXT: // %bb.0:
-; O1-NEXT: ld.param.b32 %r1, [t4_param_0];
-; O1-NEXT: ld.param.b64 %rd1, [t4_param_1];
-; O1-NEXT: mul.wide.s32 %rd2, %r1, 3;
-; O1-NEXT: add.s64 %rd3, %rd1, %rd2;
-; O1-NEXT: st.param.b64 [func_retval0], %rd3;
-; O1-NEXT: ret;
+; O1-NO-MAD-LABEL: t4(
+; O1-NO-MAD: {
+; O1-NO-MAD-NEXT: .reg .b32 %r<2>;
+; O1-NO-MAD-NEXT: .reg .b64 %rd<4>;
+; O1-NO-MAD-EMPTY:
+; O1-NO-MAD-NEXT: // %bb.0:
+; O1-NO-MAD-NEXT: ld.param.b32 %r1, [t4_param_0];
+; O1-NO-MAD-NEXT: ld.param.b64 %rd1, [t4_param_1];
+; O1-NO-MAD-NEXT: mul.wide.s32 %rd2, %r1, 3;
+; O1-NO-MAD-NEXT: add.s64 %rd3, %rd1, %rd2;
+; O1-NO-MAD-NEXT: st.param.b64 [func_retval0], %rd3;
+; O1-NO-MAD-NEXT: ret;
+;
+; O1-MAD-LABEL: t4(
+; O1-MAD: {
+; O1-MAD-NEXT: .reg .b32 %r<2>;
+; O1-MAD-NEXT: .reg .b64 %rd<3>;
+; O1-MAD-EMPTY:
+; O1-MAD-NEXT: // %bb.0:
+; O1-MAD-NEXT: ld.param.b32 %r1, [t4_param_0];
+; O1-MAD-NEXT: ld.param.b64 %rd1, [t4_param_1];
+; O1-MAD-NEXT: mad.wide.s32 %rd2, %r1, 3, %rd1;
+; O1-MAD-NEXT: st.param.b64 [func_retval0], %rd2;
+; O1-MAD-NEXT: ret;
;
; O0-LABEL: t4(
; O0: {
@@ -145,18 +192,28 @@ define i64 @t4(i32 %a, i64 %c) {
}
define i64 @t4_1(i32 %a, i64 %c) {
-;
-; O1-LABEL: t4_1(
-; O1: {
-; O1-NEXT: .reg .b32 %r<2>;
-; O1-NEXT: .reg .b64 %rd<3>;
-; O1-EMPTY:
-; O1-NEXT: // %bb.0:
-; O1-NEXT: ld.param.b32 %r1, [t4_1_param_0];
-; O1-NEXT: mul.wide.s32 %rd1, %r1, 3;
-; O1-NEXT: add.s64 %rd2, %rd1, 5;
-; O1-NEXT: st.param.b64 [func_retval0], %rd2;
-; O1-NEXT: ret;
+; O1-NO-MAD-LABEL: t4_1(
+; O1-NO-MAD: {
+; O1-NO-MAD-NEXT: .reg .b32 %r<2>;
+; O1-NO-MAD-NEXT: .reg .b64 %rd<3>;
+; O1-NO-MAD-EMPTY:
+; O1-NO-MAD-NEXT: // %bb.0:
+; O1-NO-MAD-NEXT: ld.param.b32 %r1, [t4_1_param_0];
+; O1-NO-MAD-NEXT: mul.wide.s32 %rd1, %r1, 3;
+; O1-NO-MAD-NEXT: add.s64 %rd2, %rd1, 5;
+; O1-NO-MAD-NEXT: st.param.b64 [func_retval0], %rd2;
+; O1-NO-MAD-NEXT: ret;
+;
+; O1-MAD-LABEL: t4_1(
+; O1-MAD: {
+; O1-MAD-NEXT: .reg .b32 %r<2>;
+; O1-MAD-NEXT: .reg .b64 %rd<2>;
+; O1-MAD-EMPTY:
+; O1-MAD-NEXT: // %bb.0:
+; O1-MAD-NEXT: ld.param.b32 %r1, [t4_1_param_0];
+; O1-MAD-NEXT: mad.wide.s32 %rd1, %r1, 3, 5;
+; O1-MAD-NEXT: st.param.b64 [func_retval0], %rd1;
+; O1-MAD-NEXT: ret;
;
; O0-LABEL: t4_1(
; O0: {
@@ -177,20 +234,32 @@ define i64 @t4_1(i32 %a, i64 %c) {
}
define i64 @t5(i32 %a, i32 %b, i64 %c) {
-;
-; O1-LABEL: t5(
-; O1: {
-; O1-NEXT: .reg .b32 %r<3>;
-; O1-NEXT: .reg .b64 %rd<4>;
-; O1-EMPTY:
-; O1-NEXT: // %bb.0:
-; O1-NEXT: ld.param.b32 %r1, [t5_param_0];
-; O1-NEXT: ld.param.b32 %r2, [t5_param_1];
-; O1-NEXT: mul.wide.u32 %rd1, %r1, %r2;
-; O1-NEXT: ld.param.b64 %rd2, [t5_param_2];
-; O1-NEXT: add.s64 %rd3, %rd2, %rd1;
-; O1-NEXT: st.param.b64 [func_retval0], %rd3;
-; O1-NEXT: ret;
+; O1-NO-MAD-LABEL: t5(
+; O1-NO-MAD: {
+; O1-NO-MAD-NEXT: .reg .b32 %r<3>;
+; O1-NO-MAD-NEXT: .reg .b64 %rd<4>;
+; O1-NO-MAD-EMPTY:
+; O1-NO-MAD-NEXT: // %bb.0:
+; O1-NO-MAD-NEXT: ld.param.b32 %r1, [t5_param_0];
+; O1-NO-MAD-NEXT: ld.param.b32 %r2, [t5_param_1];
+; O1-NO-MAD-NEXT: mul.wide.u32 %rd1, %r1, %r2;
+; O1-NO-MAD-NEXT: ld.param.b64 %rd2, [t5_param_2];
+; O1-NO-MAD-NEXT: add.s64 %rd3, %rd2, %rd1;
+; O1-NO-MAD-NEXT: st.param.b64 [func_retval0], %rd3;
+; O1-NO-MAD-NEXT: ret;
+;
+; O1-MAD-LABEL: t5(
+; O1-MAD: {
+; O1-MAD-NEXT: .reg .b32 %r<3>;
+; O1-MAD-NEXT: .reg .b64 %rd<3>;
+; O1-MAD-EMPTY:
+; O1-MAD-NEXT: // %bb.0:
+; O1-MAD-NEXT: ld.param.b32 %r1, [t5_param_0];
+; O1-MAD-NEXT: ld.param.b32 %r2, [t5_param_1];
+; O1-MAD-NEXT: ld.param.b64 %rd1, [t5_param_2];
+; O1-MAD-NEXT: mad.wide.u32 %rd2, %r1, %r2, %rd1;
+; O1-MAD-NEXT: st.param.b64 [func_retval0], %rd2;
+; O1-MAD-NEXT: ret;
;
; O0-LABEL: t5(
; O0: {
@@ -213,20 +282,32 @@ define i64 @t5(i32 %a, i32 %b, i64 %c) {
}
define i64 @t6(i32 %a, i32 %b, i64 %c) {
-;
-; O1-LABEL: t6(
-; O1: {
-; O1-NEXT: .reg .b32 %r<3>;
-; O1-NEXT: .reg .b64 %rd<4>;
-; O1-EMPTY:
-; O1-NEXT: // %bb.0:
-; O1-NEXT: ld.param.b32 %r1, [t6_param_0];
-; O1-NEXT: ld.param.b32 %r2, [t6_param_1];
-; O1-NEXT: mul.wide.u32 %rd1, %r1, %r2;
-; O1-NEXT: ld.param.b64 %rd2, [t6_param_2];
-; O1-NEXT: add.s64 %rd3, %rd1, %rd2;
-; O1-NEXT: st.param.b64 [func_retval0], %rd3;
-; O1-NEXT: ret;
+; O1-NO-MAD-LABEL: t6(
+; O1-NO-MAD: {
+; O1-NO-MAD-NEXT: .reg .b32 %r<3>;
+; O1-NO-MAD-NEXT: .reg .b64 %rd<4>;
+; O1-NO-MAD-EMPTY:
+; O1-NO-MAD-NEXT: // %bb.0:
+; O1-NO-MAD-NEXT: ld.param.b32 %r1, [t6_param_0];
+; O1-NO-MAD-NEXT: ld.param.b32 %r2, [t6_param_1];
+; O1-NO-MAD-NEXT: mul.wide.u32 %rd1, %r1, %r2;
+; O1-NO-MAD-NEXT: ld.param.b64 %rd2, [t6_param_2];
+; O1-NO-MAD-NEXT: add.s64 %rd3, %rd1, %rd2;
+; O1-NO-MAD-NEXT: st.param.b64 [func_retval0], %rd3;
+; O1-NO-MAD-NEXT: ret;
+;
+; O1-MAD-LABEL: t6(
+; O1-MAD: {
+; O1-MAD-NEXT: .reg .b32 %r<3>;
+; O1-MAD-NEXT: .reg .b64 %rd<3>;
+; O1-MAD-EMPTY:
+; O1-MAD-NEXT: // %bb.0:
+; O1-MAD-NEXT: ld.param.b32 %r1, [t6_param_0];
+; O1-MAD-NEXT: ld.param.b32 %r2, [t6_param_1];
+; O1-MAD-NEXT: ld.param.b64 %rd1, [t6_param_2];
+; O1-MAD-NEXT: mad.wide.u32 %rd2, %r1, %r2, %rd1;
+; O1-MAD-NEXT: st.param.b64 [func_retval0], %rd2;
+; O1-MAD-NEXT: ret;
;
; O0-LABEL: t6(
; O0: {
@@ -249,7 +330,6 @@ define i64 @t6(i32 %a, i32 %b, i64 %c) {
}
define i32 @t7(i16 %a, i16 %b) {
-;
; O1-LABEL: t7(
; O1: {
; O1-NEXT: .reg .b16 %rs<4>;
@@ -281,7 +361,6 @@ define i32 @t7(i16 %a, i16 %b) {
}
define i32 @t8(i16 %a, i16 %b) {
-;
; O1-LABEL: t8(
; O1: {
; O1-NEXT: .reg .b16 %rs<4>;
@@ -313,7 +392,6 @@ define i32 @t8(i16 %a, i16 %b) {
}
define i64 @t9(i32 %a, i32 %b) {
-;
; O1-LABEL: t9(
; O1: {
; O1-NEXT: .reg .b32 %r<4>;
@@ -345,7 +423,6 @@ define i64 @t9(i32 %a, i32 %b) {
}
define i64 @t10(i32 %a, i32 %b) {
-;
; O1-LABEL: t10(
; O1: {
; O1-NEXT: .reg .b32 %r<4>;
@@ -377,7 +454,6 @@ define i64 @t10(i32 %a, i32 %b) {
}
define i32 @t11(i16 %a, i16 %b) {
-;
; O1-LABEL: t11(
; O1: {
; O1-NEXT: .reg .b16 %rs<4>;
@@ -409,7 +485,6 @@ define i32 @t11(i16 %a, i16 %b) {
}
define i32 @t12(i16 %a, i16 %b) {
-;
; O1-LABEL: t12(
; O1: {
; O1-NEXT: .reg .b16 %rs<3>;
@@ -440,7 +515,6 @@ define i32 @t12(i16 %a, i16 %b) {
}
define i64 @t13(i32 %a, i32 %b) {
-;
; O1-LABEL: t13(
; O1: {
; O1-NEXT: .reg .b32 %r<4>;
@@ -472,7 +546,6 @@ define i64 @t13(i32 %a, i32 %b) {
}
define i64 @t14(i32 %a, i32 %b) {
-;
; O1-LABEL: t14(
; O1: {
; O1-NEXT: .reg .b32 %r<3>;
@@ -503,7 +576,6 @@ define i64 @t14(i32 %a, i32 %b) {
}
define i32 @t15(i16 %a, i16 %b) {
-;
; O1-LABEL: t15(
; O1: {
; O1-NEXT: .reg .b16 %rs<3>;
@@ -534,7 +606,6 @@ define i32 @t15(i16 %a, i16 %b) {
}
define i32 @t16(i16 %a, i16 %b) {
-;
; O1-LABEL: t16(
; O1: {
; O1-NEXT: .reg .b16 %rs<4>;
@@ -566,7 +637,6 @@ define i32 @t16(i16 %a, i16 %b) {
}
define i64 @t17(i32 %a, i32 %b) {
-;
; O1-LABEL: t17(
; O1: {
; O1-NEXT: .reg .b32 %r<3>;
@@ -597,7 +667,6 @@ define i64 @t17(i32 %a, i32 %b) {
}
define i64 @t18(i32 %a, i32 %b) {
-;
; O1-LABEL: t18(
; O1: {
; O1-NEXT: .reg .b32 %r<4>;
@@ -629,7 +698,6 @@ define i64 @t18(i32 %a, i32 %b) {
}
define i32 @t19(i16 %a, i16 %b) {
-;
; O1-LABEL: t19(
; O1: {
; O1-NEXT: .reg .b16 %rs<4>;
@@ -661,7 +729,6 @@ define i32 @t19(i16 %a, i16 %b) {
}
define i32 @t20(i16 %a) {
-;
; CHECK-LABEL: t20(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
@@ -679,7 +746,6 @@ define i32 @t20(i16 %a) {
}
define i64 @t21(i32 %a) {
-;
; CHECK-LABEL: t21(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -697,7 +763,6 @@ define i64 @t21(i32 %a) {
}
define i64 @t22(i32 %a) {
-;
; CHECK-LABEL: t22(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -715,7 +780,6 @@ define i64 @t22(i32 %a) {
}
define i32 @t23(i16 %a, i16 %b) {
-;
; CHECK-LABEL: t23(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
@@ -733,7 +797,6 @@ define i32 @t23(i16 %a, i16 %b) {
}
define i32 @t24(i16 %a, i16 %b) {
-;
; O1-LABEL: t24(
; O1: {
; O1-NEXT: .reg .b16 %rs<2>;
@@ -762,7 +825,6 @@ define i32 @t24(i16 %a, i16 %b) {
}
define i64 @t25(i32 %a) {
-;
; CHECK-LABEL: t25(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -780,7 +842,6 @@ define i64 @t25(i32 %a) {
}
define i64 @t26(i32 %a) {
-;
; O1-LABEL: t26(
; O1: {
; O1-NEXT: .reg .b32 %r<2>;
@@ -809,7 +870,6 @@ define i64 @t26(i32 %a) {
}
define i32 @t27(i16 %a, i16 %b) {
-;
; O1-LABEL: t27(
; O1: {
; O1-NEXT: .reg .b16 %rs<2>;
@@ -838,7 +898,6 @@ define i32 @t27(i16 %a, i16 %b) {
}
define i32 @t28(i16 %a, i16 %b) {
-;
; CHECK-LABEL: t28(
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<3>;
@@ -856,7 +915,6 @@ define i32 @t28(i16 %a, i16 %b) {
}
define i64 @t29(i32 %a) {
-;
; O1-LABEL: t29(
; O1: {
; O1-NEXT: .reg .b32 %r<2>;
@@ -885,7 +943,6 @@ define i64 @t29(i32 %a) {
}
define i64 @t30(i32 %a) {
-;
; CHECK-LABEL: t30(
; CHECK: {
; CHECK-NEXT: .reg .b32 %r<3>;
@@ -903,7 +960,6 @@ define i64 @t30(i32 %a) {
}
define i64 @t31(i32 %a, i32 %b) {
-;
; O1-LABEL: t31(
; O1: {
; O1-NEXT: .reg .b32 %r<4>;
@@ -935,20 +991,32 @@ define i64 @t31(i32 %a, i32 %b) {
}
define i32 @t32(i16 %a, i16 %b, i32 %c) {
-;
-; O1-LABEL: t32(
-; O1: {
-; O1-NEXT: .reg .b16 %rs<3>;
-; O1-NEXT: .reg .b32 %r<4>;
-; O1-EMPTY:
-; O1-NEXT: // %bb.0:
-; O1-NEXT: ld.param.b16 %rs1, [t32_param_0];
-; O1-NEXT: ld.param.b16 %rs2, [t32_param_1];
-; O1-NEXT: mul.wide.s16 %r1, %rs1, %rs2;
-; O1-NEXT: ld.param.b32 %r2, [t32_param_2];
-; O1-NEXT: add.s32 %r3, %r2, %r1;
-; O1-NEXT: st.param.b32 [func_retval0], %r3;
-; O1-NEXT: ret;
+; O1-NO-MAD-LABEL: t32(
+; O1-NO-MAD: {
+; O1-NO-MAD-NEXT: .reg .b16 %rs<3>;
+; O1-NO-MAD-NEXT: .reg .b32 %r<4>;
+; O1-NO-MAD-EMPTY:
+; O1-NO-MAD-NEXT: // %bb.0:
+; O1-NO-MAD-NEXT: ld.param.b16 %rs1, [t32_param_0];
+; O1-NO-MAD-NEXT: ld.param.b16 %rs2, [t32_param_1];
+; O1-NO-MAD-NEXT: mul.wide.s16 %r1, %rs1, %rs2;
+; O1-NO-MAD-NEXT: ld.param.b32 %r2, [t32_param_2];
+; O1-NO-MAD-NEXT: add.s32 %r3, %r2, %r1;
+; O1-NO-MAD-NEXT: st.param.b32 [func_retval0], %r3;
+; O1-NO-MAD-NEXT: ret;
+;
+; O1-MAD-LABEL: t32(
+; O1-MAD: {
+; O1-MAD-NEXT: .reg .b16 %rs<3>;
+; O1-MAD-NEXT: .reg .b32 %r<3>;
+; O1-MAD-EMPTY:
+; O1-MAD-NEXT: // %bb.0:
+; O1-MAD-NEXT: ld.param.b16 %rs1, [t32_param_0];
+; O1-MAD-NEXT: ld.param.b16 %rs2, [t32_param_1];
+; O1-MAD-NEXT: ld.param.b32 %r1, [t32_param_2];
+; O1-MAD-NEXT: mad.wide.s16 %r2, %rs1, %rs2, %r1;
+; O1-MAD-NEXT: st.param.b32 [func_retval0], %r2;
+; O1-MAD-NEXT: ret;
;
; O0-LABEL: t32(
; O0: {
@@ -971,20 +1039,32 @@ define i32 @t32(i16 %a, i16 %b, i32 %c) {
}
define i32 @t33(i16 %a, i16 %b, i32 %c) {
-;
-; O1-LABEL: t33(
-; O1: {
-; O1-NEXT: .reg .b16 %rs<3>;
-; O1-NEXT: .reg .b32 %r<4>;
-; O1-EMPTY:
-; O1-NEXT: // %bb.0:
-; O1-NEXT: ld.param.b16 %rs1, [t33_param_0];
-; O1-NEXT: ld.param.b16 %rs2, [t33_param_1];
-; O1-NEXT: mul.wide.s16 %r1, %rs1, %rs2;
-; O1-NEXT: ld.param.b32 %r2, [t33_param_2];
-; O1-NEXT: add.s32 %r3, %r2, %r1;
-; O1-NEXT: st.param.b32 [func_retval0], %r3;
-; O1-NEXT: ret;
+; O1-NO-MAD-LABEL: t33(
+; O1-NO-MAD: {
+; O1-NO-MAD-NEXT: .reg .b16 %rs<3>;
+; O1-NO-MAD-NEXT: .reg .b32 %r<4>;
+; O1-NO-MAD-EMPTY:
+; O1-NO-MAD-NEXT: // %bb.0:
+; O1-NO-MAD-NEXT: ld.param.b16 %rs1, [t33_param_0];
+; O1-NO-MAD-NEXT: ld.param.b16 %rs2, [t33_param_1];
+; O1-NO-MAD-NEXT: mul.wide.s16 %r1, %rs1, %rs2;
+; O1-NO-MAD-NEXT: ld.param.b32 %r2, [t33_param_2];
+; O1-NO-MAD-NEXT: add.s32 %r3, %r2, %r1;
+; O1-NO-MAD-NEXT: st.param.b32 [func_retval0], %r3;
+; O1-NO-MAD-NEXT: ret;
+;
+; O1-MAD-LABEL: t33(
+; O1-MAD: ...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/160214
More information about the llvm-commits
mailing list