[PATCH] D74444: [NVPTX, LSV] Move the LSV optimization pass to later when the graph is cleaner
Frederic Bastien via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 12 12:52:33 PST 2020
nouiz added a comment.
Before:
.visible .func foo_complex(
.param .b32 foo_complex_param_0
) // @foo_complex
{
.reg .b16 %rs<4>;
.reg .b32 %r<11>;
// %bb.0:
ld.param.u32 %r1, [foo_complex_param_0];
mov.u32 %r2, %tid.x;
mov.u32 %r3, %ctaid.x;
shl.b32 %r4, %r3, 9;
and.b32 %r5, %r4, -131072;
and.b32 %r6, %r4, 130560;
shl.b32 %r7, %r2, 1;
or.b32 %r8, %r6, %r7;
add.s32 %r9, %r1, %r5;
add.s32 %r10, %r9, %r8;
- ld.u8 %rs1, [%r10+128]; ld.u8 %rs2, [%r10+129];** max.u16 %rs3, %rs1, %rs2; st.u8 [%r10+129], %rs3; ret; // -- End function
}
After:
.visible .func foo_complex(
.param .b32 foo_complex_param_0
) // @foo_complex
{
.reg .b16 %rs<4>;
.reg .b32 %r<11>;
// %bb.0:
ld.param.u32 %r1, [foo_complex_param_0];
mov.u32 %r2, %tid.x;
mov.u32 %r3, %ctaid.x;
shl.b32 %r4, %r3, 9;
and.b32 %r5, %r4, -131072;
and.b32 %r6, %r4, 130560;
shl.b32 %r7, %r2, 1;
or.b32 %r8, %r6, %r7;
add.s32 %r9, %r1, %r5;
add.s32 %r10, %r9, %r8;
- ld.v2.u8 {%rs1, %rs2}, [%r10+128];** max.u16 %rs3, %rs1, %rs2; st.u8 [%r10+129], %rs3; ret; // -- End function
}
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D74444/new/
https://reviews.llvm.org/D74444
More information about the llvm-commits
mailing list