[llvm] [NVPTX] Improve folding to mad with immediate 1 (PR #93628)
Artem Belevich via llvm-commits
llvm-commits at lists.llvm.org
Tue May 28 18:57:05 PDT 2024
================
@@ -0,0 +1,101 @@
+; RUN: llc < %s -march=nvptx -mcpu=sm_20 -O1 | FileCheck %s
+; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -O1 | FileCheck %s
+; RUN: %if ptxas %{ llc < %s -march=nvptx -mcpu=sm_20 -O1 | %ptxas-verify %}
+; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_20 -O1 | %ptxas-verify %}
+
+define i32 @test1(i32 %n, i32 %m) {
+;
+; CHECK: ld.param.u32 %[[N:r[0-9]+]], [test1_param_0];
+; CHECK: ld.param.u32 %[[M:r[0-9]+]], [test1_param_1];
+; CHECK: mad.lo.s32 %[[MAD:r[0-9]+]], %[[M]], %[[N]], %[[M]];
+; CHECK: st.param.b32 [func_retval0+0], %[[MAD]];
+;
+ %add = add i32 %n, 1
+ %mul = mul i32 %add, %m
+ ret i32 %mul
+}
+
+define i32 @test1_rev(i32 %n, i32 %m) {
+;
+; CHECK: ld.param.u32 %[[N:r[0-9]+]], [test1_rev_param_0];
+; CHECK: ld.param.u32 %[[M:r[0-9]+]], [test1_rev_param_1];
+; CHECK: mad.lo.s32 %[[MAD:r[0-9]+]], %[[M]], %[[N]], %[[M]];
+; CHECK: st.param.b32 [func_retval0+0], %[[MAD]];
+;
+ %add = add i32 %n, 1
+ %mul = mul i32 %m, %add
+ ret i32 %mul
+}
+
+; Transpose (mul (select)) if it can then be folded to mad
----------------
Artem-B wrote:
Does it buy us anything?
`mul(m,select(1,n))` will probably have the same performance as `select(mul(m,n), m)` as the critical path will always have `mul` and `select`, just in different order.
https://github.com/llvm/llvm-project/pull/93628
More information about the llvm-commits
mailing list