[PATCH] Add NVPTXPeephole pass to reduce unnecessary address cast

Samuel Antao sfantao at us.ibm.com
Mon Jun 29 21:17:58 PDT 2015


Hi jingyue,

I got a regression caused by this patch. What I see is that %SP is still used if references to automatic vars (alloca) are passed to functions but %SP is no longer initialized to anything.

I have this (broken) code being now generated:

  .visible .entry __omptgt__0_db262_31_(
          .param .u64 __omptgt__0_db262_31__param_0,
          .param .u64 __omptgt__0_db262_31__param_1,
          .param .u64 __omptgt__0_db262_31__param_2,
          .param .u64 __omptgt__0_db262_31__param_3,
          .param .u64 __omptgt__0_db262_31__param_4
  )
  {
          .local .align 8 .b8     __local_depot0[56];
          .reg .b64       %SP;
          .reg .b64       %SPL;
          .reg .pred      %p<18>;
          .reg .s32       %r<31>;
          .reg .s64       %rd<30>;
  
          mov.u64         %SPL, __local_depot0;
  ...
            add.u64         %rd14, %SP, 36;
          // Callseq Start 1
          {
          .reg .b32 temp_param_reg;
          // <end>}
          .param .b64 param0;
          st.param.b64    [param0+0], %rd12;
          .param .b32 param1;
          st.param.b32    [param1+0], %r6;
          .param .b32 param2;
          st.param.b32    [param2+0], %r18;
          .param .b64 param3;
          st.param.b64    [param3+0], %rd13;
          .param .b64 param4;
          st.param.b64    [param4+0], %rd10;
          .param .b64 param5;
          st.param.b64    [param5+0], %rd8;
          .param .b64 param6;
          st.param.b64    [param6+0], %rd14;
          .param .b32 param7;
          st.param.b32    [param7+0], %r10;
          .param .b32 param8;
          st.param.b32    [param8+0], %r11;
          call.uni 
          __kmpc_for_static_init_4, 
          (
          param0, 
          param1, 
          param2, 
          param3, 
          param4, 
          param5, 
          param6, 
          param7, 
          param8
          );

`%rd14` is computed from %SP but %SP never gets initialized. Before I used to have:

  .visible .entry __omptgt__0_db262_31_(
          .param .u64 __omptgt__0_db262_31__param_0,
          .param .u64 __omptgt__0_db262_31__param_1,
          .param .u64 __omptgt__0_db262_31__param_2,
          .param .u64 __omptgt__0_db262_31__param_3,
          .param .u64 __omptgt__0_db262_31__param_4
  )
  {
          .local .align 8 .b8     __local_depot0[56];
          .reg .b64       %SP;
          .reg .b64       %SPL;
          .reg .pred      %p<18>;
          .reg .s32       %r<31>;
          .reg .s64       %rd<31>;
  
          mov.u64         %rd30, __local_depot0;
          cvta.local.u64  %SP, %rd30;

`%SP` is initialized properly (and `%SPL` is not used at all) so the references are properly generated. I suspect that the changes in this patch have to be reflected in other pieces of the backend. What do you think is the best way to tackle the problem?

Thanks,
Samuel


http://reviews.llvm.org/D10549

EMAIL PREFERENCES
  http://reviews.llvm.org/settings/panel/emailpreferences/






More information about the llvm-commits mailing list