[PATCH] Add NVPTXPeephole pass to reduce unnecessary address cast

Jingyue Wu jingyue at google.com
Tue Jun 30 10:03:09 PDT 2015


Looks like a bug to me. We'll figure it out. Thank you!

On Mon, Jun 29, 2015 at 9:17 PM, Samuel Antao <sfantao at us.ibm.com> wrote:

> Hi jingyue,
>
> I got a regression caused by this patch. What I see is that %SP is still
> used if references to automatic vars (alloca) are passed to functions but
> %SP is no longer initialized to anything.
>
> I have this (broken) code being now generated:
>
>   .visible .entry __omptgt__0_db262_31_(
>           .param .u64 __omptgt__0_db262_31__param_0,
>           .param .u64 __omptgt__0_db262_31__param_1,
>           .param .u64 __omptgt__0_db262_31__param_2,
>           .param .u64 __omptgt__0_db262_31__param_3,
>           .param .u64 __omptgt__0_db262_31__param_4
>   )
>   {
>           .local .align 8 .b8     __local_depot0[56];
>           .reg .b64       %SP;
>           .reg .b64       %SPL;
>           .reg .pred      %p<18>;
>           .reg .s32       %r<31>;
>           .reg .s64       %rd<30>;
>
>           mov.u64         %SPL, __local_depot0;
>   ...
>             add.u64         %rd14, %SP, 36;
>           // Callseq Start 1
>           {
>           .reg .b32 temp_param_reg;
>           // <end>}
>           .param .b64 param0;
>           st.param.b64    [param0+0], %rd12;
>           .param .b32 param1;
>           st.param.b32    [param1+0], %r6;
>           .param .b32 param2;
>           st.param.b32    [param2+0], %r18;
>           .param .b64 param3;
>           st.param.b64    [param3+0], %rd13;
>           .param .b64 param4;
>           st.param.b64    [param4+0], %rd10;
>           .param .b64 param5;
>           st.param.b64    [param5+0], %rd8;
>           .param .b64 param6;
>           st.param.b64    [param6+0], %rd14;
>           .param .b32 param7;
>           st.param.b32    [param7+0], %r10;
>           .param .b32 param8;
>           st.param.b32    [param8+0], %r11;
>           call.uni
>           __kmpc_for_static_init_4,
>           (
>           param0,
>           param1,
>           param2,
>           param3,
>           param4,
>           param5,
>           param6,
>           param7,
>           param8
>           );
>
> `%rd14` is computed from %SP but %SP never gets initialized. Before I used
> to have:
>
>   .visible .entry __omptgt__0_db262_31_(
>           .param .u64 __omptgt__0_db262_31__param_0,
>           .param .u64 __omptgt__0_db262_31__param_1,
>           .param .u64 __omptgt__0_db262_31__param_2,
>           .param .u64 __omptgt__0_db262_31__param_3,
>           .param .u64 __omptgt__0_db262_31__param_4
>   )
>   {
>           .local .align 8 .b8     __local_depot0[56];
>           .reg .b64       %SP;
>           .reg .b64       %SPL;
>           .reg .pred      %p<18>;
>           .reg .s32       %r<31>;
>           .reg .s64       %rd<31>;
>
>           mov.u64         %rd30, __local_depot0;
>           cvta.local.u64  %SP, %rd30;
>
> `%SP` is initialized properly (and `%SPL` is not used at all) so the
> references are properly generated. I suspect that the changes in this patch
> have to be reflected in other pieces of the backend. What do you think is
> the best way to tackle the problem?
>
> Thanks,
> Samuel
>
>
> http://reviews.llvm.org/D10549
>
> EMAIL PREFERENCES
>   http://reviews.llvm.org/settings/panel/emailpreferences/
>
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150630/ef2863c3/attachment.html>


More information about the llvm-commits mailing list