[llvm] Partial revert "[NVPTX] Enhance mul.wide and mad.wide peepholes#150477" (PR #155024)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Aug 22 13:08:36 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-nvptx
Author: Justin Fargnoli (justinfargnoli)
<details>
<summary>Changes</summary>
Fix https://github.com/llvm/llvm-project/pull/150477#issuecomment-3191367837
---
Full diff: https://github.com/llvm/llvm-project/pull/155024.diff
5 Files Affected:
- (modified) llvm/lib/Target/NVPTX/NVPTXInstrInfo.td (-5)
- (modified) llvm/test/CodeGen/NVPTX/bug26185-2.ll (+5-4)
- (modified) llvm/test/CodeGen/NVPTX/combine-wide.ll (+64-50)
- (modified) llvm/test/CodeGen/NVPTX/local-stack-frame.ll (+4-3)
- (modified) llvm/test/CodeGen/NVPTX/vector-loads.ll (+6-5)
``````````diff
diff --git a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
index 7b135098bd4c1..e81da246e5f44 100644
--- a/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -897,11 +897,6 @@ let Predicates = [hasOptEnabled] in {
defm MAD_LO_S16 : MADInst<"lo.s16", mul, I16RT, I16RT>;
defm MAD_LO_S32 : MADInst<"lo.s32", mul, I32RT, I32RT>;
defm MAD_LO_S64 : MADInst<"lo.s64", mul, I64RT, I64RT>;
-
- defm MAD_WIDE_U16 : MADInst<"wide.u16", umul_wide, I32RT, I16RT>;
- defm MAD_WIDE_S16 : MADInst<"wide.s16", smul_wide, I32RT, I16RT>;
- defm MAD_WIDE_U32 : MADInst<"wide.u32", umul_wide, I64RT, I32RT>;
- defm MAD_WIDE_S32 : MADInst<"wide.s32", smul_wide, I64RT, I32RT>;
}
//-----------------------------------
diff --git a/llvm/test/CodeGen/NVPTX/bug26185-2.ll b/llvm/test/CodeGen/NVPTX/bug26185-2.ll
index 46172b1af1236..4e11f58f85ee0 100644
--- a/llvm/test/CodeGen/NVPTX/bug26185-2.ll
+++ b/llvm/test/CodeGen/NVPTX/bug26185-2.ll
@@ -16,7 +16,7 @@ define ptx_kernel void @spam(ptr addrspace(1) noalias nocapture readonly %arg, p
; CHECK: .maxntid 1, 1, 1
; CHECK-NEXT: {
; CHECK-NEXT: .reg .b32 %r<2>;
-; CHECK-NEXT: .reg .b64 %rd<8>;
+; CHECK-NEXT: .reg .b64 %rd<9>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0: // %bb
; CHECK-NEXT: ld.param.b64 %rd1, [spam_param_0];
@@ -25,9 +25,10 @@ define ptx_kernel void @spam(ptr addrspace(1) noalias nocapture readonly %arg, p
; CHECK-NEXT: add.s64 %rd4, %rd1, %rd3;
; CHECK-NEXT: ld.param.b64 %rd5, [spam_param_1];
; CHECK-NEXT: ld.global.nc.s16 %r1, [%rd4+16];
-; CHECK-NEXT: ld.global.b64 %rd6, [%rd5];
-; CHECK-NEXT: mad.wide.s32 %rd7, %r1, %r1, %rd6;
-; CHECK-NEXT: st.global.b64 [%rd5], %rd7;
+; CHECK-NEXT: mul.wide.s32 %rd6, %r1, %r1;
+; CHECK-NEXT: ld.global.b64 %rd7, [%rd5];
+; CHECK-NEXT: add.s64 %rd8, %rd6, %rd7;
+; CHECK-NEXT: st.global.b64 [%rd5], %rd8;
; CHECK-NEXT: ret;
bb:
%tmp5 = add nsw i64 %arg3, 8
diff --git a/llvm/test/CodeGen/NVPTX/combine-wide.ll b/llvm/test/CodeGen/NVPTX/combine-wide.ll
index ed4a2b6e419c3..b5948d37c3505 100644
--- a/llvm/test/CodeGen/NVPTX/combine-wide.ll
+++ b/llvm/test/CodeGen/NVPTX/combine-wide.ll
@@ -9,14 +9,15 @@ define i64 @t1(i32 %a, i32 %b, i64 %c) {
; O1-LABEL: t1(
; O1: {
; O1-NEXT: .reg .b32 %r<3>;
-; O1-NEXT: .reg .b64 %rd<3>;
+; O1-NEXT: .reg .b64 %rd<4>;
; O1-EMPTY:
; O1-NEXT: // %bb.0:
; O1-NEXT: ld.param.b32 %r1, [t1_param_0];
; O1-NEXT: ld.param.b32 %r2, [t1_param_1];
-; O1-NEXT: ld.param.b64 %rd1, [t1_param_2];
-; O1-NEXT: mad.wide.s32 %rd2, %r1, %r2, %rd1;
-; O1-NEXT: st.param.b64 [func_retval0], %rd2;
+; O1-NEXT: mul.wide.s32 %rd1, %r1, %r2;
+; O1-NEXT: ld.param.b64 %rd2, [t1_param_2];
+; O1-NEXT: add.s64 %rd3, %rd2, %rd1;
+; O1-NEXT: st.param.b64 [func_retval0], %rd3;
; O1-NEXT: ret;
;
; O0-LABEL: t1(
@@ -44,14 +45,15 @@ define i64 @t2(i32 %a, i32 %b, i64 %c) {
; O1-LABEL: t2(
; O1: {
; O1-NEXT: .reg .b32 %r<3>;
-; O1-NEXT: .reg .b64 %rd<3>;
+; O1-NEXT: .reg .b64 %rd<4>;
; O1-EMPTY:
; O1-NEXT: // %bb.0:
; O1-NEXT: ld.param.b32 %r1, [t2_param_0];
; O1-NEXT: ld.param.b32 %r2, [t2_param_1];
-; O1-NEXT: ld.param.b64 %rd1, [t2_param_2];
-; O1-NEXT: mad.wide.s32 %rd2, %r1, %r2, %rd1;
-; O1-NEXT: st.param.b64 [func_retval0], %rd2;
+; O1-NEXT: mul.wide.s32 %rd1, %r1, %r2;
+; O1-NEXT: ld.param.b64 %rd2, [t2_param_2];
+; O1-NEXT: add.s64 %rd3, %rd1, %rd2;
+; O1-NEXT: st.param.b64 [func_retval0], %rd3;
; O1-NEXT: ret;
;
; O0-LABEL: t2(
@@ -79,13 +81,14 @@ define i64 @t3(i32 %a, i32 %b) {
; O1-LABEL: t3(
; O1: {
; O1-NEXT: .reg .b32 %r<3>;
-; O1-NEXT: .reg .b64 %rd<2>;
+; O1-NEXT: .reg .b64 %rd<3>;
; O1-EMPTY:
; O1-NEXT: // %bb.0:
; O1-NEXT: ld.param.b32 %r1, [t3_param_0];
; O1-NEXT: ld.param.b32 %r2, [t3_param_1];
-; O1-NEXT: mad.wide.s32 %rd1, %r1, %r2, 1;
-; O1-NEXT: st.param.b64 [func_retval0], %rd1;
+; O1-NEXT: mul.wide.s32 %rd1, %r1, %r2;
+; O1-NEXT: add.s64 %rd2, %rd1, 1;
+; O1-NEXT: st.param.b64 [func_retval0], %rd2;
; O1-NEXT: ret;
;
; O0-LABEL: t3(
@@ -112,13 +115,14 @@ define i64 @t4(i32 %a, i64 %c) {
; O1-LABEL: t4(
; O1: {
; O1-NEXT: .reg .b32 %r<2>;
-; O1-NEXT: .reg .b64 %rd<3>;
+; O1-NEXT: .reg .b64 %rd<4>;
; O1-EMPTY:
; O1-NEXT: // %bb.0:
; O1-NEXT: ld.param.b32 %r1, [t4_param_0];
; O1-NEXT: ld.param.b64 %rd1, [t4_param_1];
-; O1-NEXT: mad.wide.s32 %rd2, %r1, 3, %rd1;
-; O1-NEXT: st.param.b64 [func_retval0], %rd2;
+; O1-NEXT: mul.wide.s32 %rd2, %r1, 3;
+; O1-NEXT: add.s64 %rd3, %rd1, %rd2;
+; O1-NEXT: st.param.b64 [func_retval0], %rd3;
; O1-NEXT: ret;
;
; O0-LABEL: t4(
@@ -145,12 +149,13 @@ define i64 @t4_1(i32 %a, i64 %c) {
; O1-LABEL: t4_1(
; O1: {
; O1-NEXT: .reg .b32 %r<2>;
-; O1-NEXT: .reg .b64 %rd<2>;
+; O1-NEXT: .reg .b64 %rd<3>;
; O1-EMPTY:
; O1-NEXT: // %bb.0:
; O1-NEXT: ld.param.b32 %r1, [t4_1_param_0];
-; O1-NEXT: mad.wide.s32 %rd1, %r1, 3, 5;
-; O1-NEXT: st.param.b64 [func_retval0], %rd1;
+; O1-NEXT: mul.wide.s32 %rd1, %r1, 3;
+; O1-NEXT: add.s64 %rd2, %rd1, 5;
+; O1-NEXT: st.param.b64 [func_retval0], %rd2;
; O1-NEXT: ret;
;
; O0-LABEL: t4_1(
@@ -176,14 +181,15 @@ define i64 @t5(i32 %a, i32 %b, i64 %c) {
; O1-LABEL: t5(
; O1: {
; O1-NEXT: .reg .b32 %r<3>;
-; O1-NEXT: .reg .b64 %rd<3>;
+; O1-NEXT: .reg .b64 %rd<4>;
; O1-EMPTY:
; O1-NEXT: // %bb.0:
; O1-NEXT: ld.param.b32 %r1, [t5_param_0];
; O1-NEXT: ld.param.b32 %r2, [t5_param_1];
-; O1-NEXT: ld.param.b64 %rd1, [t5_param_2];
-; O1-NEXT: mad.wide.u32 %rd2, %r1, %r2, %rd1;
-; O1-NEXT: st.param.b64 [func_retval0], %rd2;
+; O1-NEXT: mul.wide.u32 %rd1, %r1, %r2;
+; O1-NEXT: ld.param.b64 %rd2, [t5_param_2];
+; O1-NEXT: add.s64 %rd3, %rd2, %rd1;
+; O1-NEXT: st.param.b64 [func_retval0], %rd3;
; O1-NEXT: ret;
;
; O0-LABEL: t5(
@@ -211,14 +217,15 @@ define i64 @t6(i32 %a, i32 %b, i64 %c) {
; O1-LABEL: t6(
; O1: {
; O1-NEXT: .reg .b32 %r<3>;
-; O1-NEXT: .reg .b64 %rd<3>;
+; O1-NEXT: .reg .b64 %rd<4>;
; O1-EMPTY:
; O1-NEXT: // %bb.0:
; O1-NEXT: ld.param.b32 %r1, [t6_param_0];
; O1-NEXT: ld.param.b32 %r2, [t6_param_1];
-; O1-NEXT: ld.param.b64 %rd1, [t6_param_2];
-; O1-NEXT: mad.wide.u32 %rd2, %r1, %r2, %rd1;
-; O1-NEXT: st.param.b64 [func_retval0], %rd2;
+; O1-NEXT: mul.wide.u32 %rd1, %r1, %r2;
+; O1-NEXT: ld.param.b64 %rd2, [t6_param_2];
+; O1-NEXT: add.s64 %rd3, %rd1, %rd2;
+; O1-NEXT: st.param.b64 [func_retval0], %rd3;
; O1-NEXT: ret;
;
; O0-LABEL: t6(
@@ -932,14 +939,15 @@ define i32 @t32(i16 %a, i16 %b, i32 %c) {
; O1-LABEL: t32(
; O1: {
; O1-NEXT: .reg .b16 %rs<3>;
-; O1-NEXT: .reg .b32 %r<3>;
+; O1-NEXT: .reg .b32 %r<4>;
; O1-EMPTY:
; O1-NEXT: // %bb.0:
; O1-NEXT: ld.param.b16 %rs1, [t32_param_0];
; O1-NEXT: ld.param.b16 %rs2, [t32_param_1];
-; O1-NEXT: ld.param.b32 %r1, [t32_param_2];
-; O1-NEXT: mad.wide.s16 %r2, %rs1, %rs2, %r1;
-; O1-NEXT: st.param.b32 [func_retval0], %r2;
+; O1-NEXT: mul.wide.s16 %r1, %rs1, %rs2;
+; O1-NEXT: ld.param.b32 %r2, [t32_param_2];
+; O1-NEXT: add.s32 %r3, %r2, %r1;
+; O1-NEXT: st.param.b32 [func_retval0], %r3;
; O1-NEXT: ret;
;
; O0-LABEL: t32(
@@ -967,14 +975,15 @@ define i32 @t33(i16 %a, i16 %b, i32 %c) {
; O1-LABEL: t33(
; O1: {
; O1-NEXT: .reg .b16 %rs<3>;
-; O1-NEXT: .reg .b32 %r<3>;
+; O1-NEXT: .reg .b32 %r<4>;
; O1-EMPTY:
; O1-NEXT: // %bb.0:
; O1-NEXT: ld.param.b16 %rs1, [t33_param_0];
; O1-NEXT: ld.param.b16 %rs2, [t33_param_1];
-; O1-NEXT: ld.param.b32 %r1, [t33_param_2];
-; O1-NEXT: mad.wide.s16 %r2, %rs1, %rs2, %r1;
-; O1-NEXT: st.param.b32 [func_retval0], %r2;
+; O1-NEXT: mul.wide.s16 %r1, %rs1, %rs2;
+; O1-NEXT: ld.param.b32 %r2, [t33_param_2];
+; O1-NEXT: add.s32 %r3, %r2, %r1;
+; O1-NEXT: st.param.b32 [func_retval0], %r3;
; O1-NEXT: ret;
;
; O0-LABEL: t33(
@@ -1002,13 +1011,14 @@ define i32 @t34(i16 %a, i16 %b) {
; O1-LABEL: t34(
; O1: {
; O1-NEXT: .reg .b16 %rs<3>;
-; O1-NEXT: .reg .b32 %r<2>;
+; O1-NEXT: .reg .b32 %r<3>;
; O1-EMPTY:
; O1-NEXT: // %bb.0:
; O1-NEXT: ld.param.b16 %rs1, [t34_param_0];
; O1-NEXT: ld.param.b16 %rs2, [t34_param_1];
-; O1-NEXT: mad.wide.s16 %r1, %rs1, %rs2, 1;
-; O1-NEXT: st.param.b32 [func_retval0], %r1;
+; O1-NEXT: mul.wide.s16 %r1, %rs1, %rs2;
+; O1-NEXT: add.s32 %r2, %r1, 1;
+; O1-NEXT: st.param.b32 [func_retval0], %r2;
; O1-NEXT: ret;
;
; O0-LABEL: t34(
@@ -1035,13 +1045,14 @@ define i32 @t35(i16 %a, i32 %c) {
; O1-LABEL: t35(
; O1: {
; O1-NEXT: .reg .b16 %rs<2>;
-; O1-NEXT: .reg .b32 %r<3>;
+; O1-NEXT: .reg .b32 %r<4>;
; O1-EMPTY:
; O1-NEXT: // %bb.0:
; O1-NEXT: ld.param.b16 %rs1, [t35_param_0];
; O1-NEXT: ld.param.b32 %r1, [t35_param_1];
-; O1-NEXT: mad.wide.s16 %r2, %rs1, 3, %r1;
-; O1-NEXT: st.param.b32 [func_retval0], %r2;
+; O1-NEXT: mul.wide.s16 %r2, %rs1, 3;
+; O1-NEXT: add.s32 %r3, %r1, %r2;
+; O1-NEXT: st.param.b32 [func_retval0], %r3;
; O1-NEXT: ret;
;
; O0-LABEL: t35(
@@ -1068,12 +1079,13 @@ define i32 @t36(i16 %a, i32 %c) {
; O1-LABEL: t36(
; O1: {
; O1-NEXT: .reg .b16 %rs<2>;
-; O1-NEXT: .reg .b32 %r<2>;
+; O1-NEXT: .reg .b32 %r<3>;
; O1-EMPTY:
; O1-NEXT: // %bb.0:
; O1-NEXT: ld.param.b16 %rs1, [t36_param_0];
-; O1-NEXT: mad.wide.s16 %r1, %rs1, 3, 5;
-; O1-NEXT: st.param.b32 [func_retval0], %r1;
+; O1-NEXT: mul.wide.s16 %r1, %rs1, 3;
+; O1-NEXT: add.s32 %r2, %r1, 5;
+; O1-NEXT: st.param.b32 [func_retval0], %r2;
; O1-NEXT: ret;
;
; O0-LABEL: t36(
@@ -1099,14 +1111,15 @@ define i32 @t37(i16 %a, i16 %b, i32 %c) {
; O1-LABEL: t37(
; O1: {
; O1-NEXT: .reg .b16 %rs<3>;
-; O1-NEXT: .reg .b32 %r<3>;
+; O1-NEXT: .reg .b32 %r<4>;
; O1-EMPTY:
; O1-NEXT: // %bb.0:
; O1-NEXT: ld.param.b16 %rs1, [t37_param_0];
; O1-NEXT: ld.param.b16 %rs2, [t37_param_1];
-; O1-NEXT: ld.param.b32 %r1, [t37_param_2];
-; O1-NEXT: mad.wide.u16 %r2, %rs1, %rs2, %r1;
-; O1-NEXT: st.param.b32 [func_retval0], %r2;
+; O1-NEXT: mul.wide.u16 %r1, %rs1, %rs2;
+; O1-NEXT: ld.param.b32 %r2, [t37_param_2];
+; O1-NEXT: add.s32 %r3, %r2, %r1;
+; O1-NEXT: st.param.b32 [func_retval0], %r3;
; O1-NEXT: ret;
;
; O0-LABEL: t37(
@@ -1134,14 +1147,15 @@ define i32 @t38(i16 %a, i16 %b, i32 %c) {
; O1-LABEL: t38(
; O1: {
; O1-NEXT: .reg .b16 %rs<3>;
-; O1-NEXT: .reg .b32 %r<3>;
+; O1-NEXT: .reg .b32 %r<4>;
; O1-EMPTY:
; O1-NEXT: // %bb.0:
; O1-NEXT: ld.param.b16 %rs1, [t38_param_0];
; O1-NEXT: ld.param.b16 %rs2, [t38_param_1];
-; O1-NEXT: ld.param.b32 %r1, [t38_param_2];
-; O1-NEXT: mad.wide.u16 %r2, %rs1, %rs2, %r1;
-; O1-NEXT: st.param.b32 [func_retval0], %r2;
+; O1-NEXT: mul.wide.u16 %r1, %rs1, %rs2;
+; O1-NEXT: ld.param.b32 %r2, [t38_param_2];
+; O1-NEXT: add.s32 %r3, %r1, %r2;
+; O1-NEXT: st.param.b32 [func_retval0], %r3;
; O1-NEXT: ret;
;
; O0-LABEL: t38(
diff --git a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
index f7137e05a5e4f..e89211826a514 100644
--- a/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
+++ b/llvm/test/CodeGen/NVPTX/local-stack-frame.ll
@@ -114,14 +114,15 @@ define void @foo3(i32 %a) {
; PTX64-NEXT: .reg .b64 %SP;
; PTX64-NEXT: .reg .b64 %SPL;
; PTX64-NEXT: .reg .b32 %r<2>;
-; PTX64-NEXT: .reg .b64 %rd<3>;
+; PTX64-NEXT: .reg .b64 %rd<4>;
; PTX64-EMPTY:
; PTX64-NEXT: // %bb.0:
; PTX64-NEXT: mov.b64 %SPL, __local_depot2;
; PTX64-NEXT: ld.param.b32 %r1, [foo3_param_0];
; PTX64-NEXT: add.u64 %rd1, %SPL, 0;
-; PTX64-NEXT: mad.wide.s32 %rd2, %r1, 4, %rd1;
-; PTX64-NEXT: st.local.b32 [%rd2], %r1;
+; PTX64-NEXT: mul.wide.s32 %rd2, %r1, 4;
+; PTX64-NEXT: add.s64 %rd3, %rd1, %rd2;
+; PTX64-NEXT: st.local.b32 [%rd3], %r1;
; PTX64-NEXT: ret;
%local = alloca [3 x i32], align 4
%1 = getelementptr inbounds i32, ptr %local, i32 %a
diff --git a/llvm/test/CodeGen/NVPTX/vector-loads.ll b/llvm/test/CodeGen/NVPTX/vector-loads.ll
index ccac7ff8e6472..1ae6f6bcd748f 100644
--- a/llvm/test/CodeGen/NVPTX/vector-loads.ll
+++ b/llvm/test/CodeGen/NVPTX/vector-loads.ll
@@ -154,7 +154,7 @@ define void @foo_complex(ptr nocapture readonly align 16 dereferenceable(1342177
; CHECK: {
; CHECK-NEXT: .reg .b16 %rs<4>;
; CHECK-NEXT: .reg .b32 %r<8>;
-; CHECK-NEXT: .reg .b64 %rd<5>;
+; CHECK-NEXT: .reg .b64 %rd<6>;
; CHECK-EMPTY:
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: ld.param.b64 %rd1, [foo_complex_param_0];
@@ -166,11 +166,12 @@ define void @foo_complex(ptr nocapture readonly align 16 dereferenceable(1342177
; CHECK-NEXT: shl.b32 %r6, %r1, 1;
; CHECK-NEXT: or.b32 %r7, %r5, %r6;
; CHECK-NEXT: cvt.u64.u32 %rd2, %r7;
-; CHECK-NEXT: mad.wide.u32 %rd3, %r3, 131072, %rd1;
-; CHECK-NEXT: add.s64 %rd4, %rd3, %rd2;
-; CHECK-NEXT: ld.v2.b8 {%rs1, %rs2}, [%rd4+128];
+; CHECK-NEXT: mul.wide.u32 %rd3, %r3, 131072;
+; CHECK-NEXT: add.s64 %rd4, %rd1, %rd3;
+; CHECK-NEXT: add.s64 %rd5, %rd4, %rd2;
+; CHECK-NEXT: ld.v2.b8 {%rs1, %rs2}, [%rd5+128];
; CHECK-NEXT: max.u16 %rs3, %rs1, %rs2;
-; CHECK-NEXT: st.b8 [%rd4+129], %rs3;
+; CHECK-NEXT: st.b8 [%rd5+129], %rs3;
; CHECK-NEXT: ret;
%t0 = tail call i32 @llvm.nvvm.read.ptx.sreg.tid.x(), !range !1
%t1 = tail call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
``````````
</details>
https://github.com/llvm/llvm-project/pull/155024
More information about the llvm-commits
mailing list