[PATCH] D74444: [NVPTX, LSV] Move the LSV optimization pass to later when the graph is cleaner

Frederic Bastien via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Wed Feb 12 12:52:33 PST 2020


nouiz added a comment.

Before:

.visible .func foo_complex(

  .param .b32 foo_complex_param_0

)                                       // @foo_complex
{

  .reg .b16       %rs<4>;
  .reg .b32       %r<11>;

// %bb.0:

  ld.param.u32    %r1, [foo_complex_param_0];
  mov.u32         %r2, %tid.x;
  mov.u32         %r3, %ctaid.x;
  shl.b32         %r4, %r3, 9;
  and.b32         %r5, %r4, -131072;
  and.b32         %r6, %r4, 130560;
  shl.b32         %r7, %r2, 1;
  or.b32          %r8, %r6, %r7;
  add.s32         %r9, %r1, %r5;
  add.s32         %r10, %r9, %r8;

- ld.u8   %rs1, [%r10+128]; ld.u8   %rs2, [%r10+129];** max.u16         %rs3, %rs1, %rs2; st.u8   [%r10+129], %rs3; ret; // -- End function

}

After:

.visible .func foo_complex(

  .param .b32 foo_complex_param_0

)                                       // @foo_complex
{

  .reg .b16       %rs<4>;
  .reg .b32       %r<11>;

// %bb.0:

  ld.param.u32    %r1, [foo_complex_param_0];
  mov.u32         %r2, %tid.x;
  mov.u32         %r3, %ctaid.x;
  shl.b32         %r4, %r3, 9;
  and.b32         %r5, %r4, -131072;
  and.b32         %r6, %r4, 130560;
  shl.b32         %r7, %r2, 1;
  or.b32          %r8, %r6, %r7;
  add.s32         %r9, %r1, %r5;
  add.s32         %r10, %r9, %r8;

- ld.v2.u8        {%rs1, %rs2}, [%r10+128];** max.u16         %rs3, %rs1, %rs2; st.u8   [%r10+129], %rs3; ret; // -- End function

}


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D74444/new/

https://reviews.llvm.org/D74444





More information about the llvm-commits mailing list