[llvm] [NVPTX] Enhance `mul.wide` and `mad.wide` peepholes (PR #150477)
Justin Fargnoli via llvm-commits
llvm-commits at lists.llvm.org
Fri Jul 25 13:13:26 PDT 2025
================
@@ -0,0 +1,117 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s | FileCheck %s
+
+target triple = "nvptx64-nvidia-cuda"
+
+define i64 @t1(i32 %a, i32 %b, i64 %c) {
+; CHECK-LABEL: t1(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [t1_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [t1_param_1];
+; CHECK-NEXT: ld.param.b64 %rd1, [t1_param_2];
+; CHECK-NEXT: mad.wide.s32 %rd2, %r1, %r2, %rd1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
+; CHECK-NEXT: ret;
+ %mul = mul i32 %a, %b
+ %sext = sext i32 %mul to i64
+ %add = add i64 %c, %sext
+ ret i64 %add
+}
+
+define i64 @t2(i32 %a, i32 %b, i64 %c) {
+; CHECK-LABEL: t2(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [t2_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [t2_param_1];
+; CHECK-NEXT: ld.param.b64 %rd1, [t2_param_2];
+; CHECK-NEXT: mad.wide.s32 %rd2, %r1, %r2, %rd1;
+; CHECK-NEXT: st.param.b64 [func_retval0], %rd2;
+; CHECK-NEXT: ret;
+ %mul = mul i32 %a, %b
+ %sext = sext i32 %mul to i64
+ %add = add i64 %sext, %c
+ ret i64 %add
+}
+
+define i64 @t3(i32 %a, i32 %b) {
+; CHECK-LABEL: t3(
+; CHECK: {
+; CHECK-NEXT: .reg .b32 %r<3>;
+; CHECK-NEXT: .reg .b64 %rd<3>;
+; CHECK-EMPTY:
+; CHECK-NEXT: // %bb.0:
+; CHECK-NEXT: ld.param.b32 %r1, [t3_param_0];
+; CHECK-NEXT: ld.param.b32 %r2, [t3_param_1];
+; CHECK-NEXT: mov.b64 %rd1, 1;
----------------
justinfargnoli wrote:
Added: https://github.com/llvm/llvm-project/blob/a69e982ac014a5a957c722f1fea62eb26c7dedeb/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td#L999-L1013
https://github.com/llvm/llvm-project/pull/150477
More information about the llvm-commits
mailing list