[PATCH] D119965: [LICM][PhaseOrder] Don't speculate in LICM until after running loop rotate

Guozhi Wei via Phabricator via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 25 12:18:47 PDT 2022


Carrot added a comment.

Before this patch I have a code snippet

       │ 80:   mov      -0x58(%rsp),%rdx                                                                                                                                                      
       │       mov      -0x60(%rsp),%r9                                                                                                                                                       
       │       mov      -0x40(%rsp),%r15                                                                                                                                                      
  0.63 │ 8f:   mulps    %xmm8,%xmm9                                                                                                                                                           
  0.44 │       movups   (%rdx,%r9,4),%xmm1                                                                                                                                                    
  0.36 │       addps    %xmm9,%xmm1                                                                                                                                                           
  0.25 │       movups   %xmm1,(%rdx,%r9,4)                                                                                                                                                    
  0.75 │       mulps    %xmm8,%xmm3                                                                                                                                                           
  0.41 │       movups   (%rdx,%r10,4),%xmm1                                                                                                                                                   
  0.86 │       addps    %xmm3,%xmm1                                                                                                                                                           
  0.70 │       movups   %xmm1,(%rdx,%r10,4)                                                                                                                                                   
  0.43 │       add      $0x8,%r9                                                                                                                                                              
  0.28 │       add      -0x48(%rsp),%rbx                                                                                                                                                      
  0.07 │       cmp      %r15,%r9                                                                                                                                                              
  0.14 │     ↓ jge      3e5                                             
               ...

After this patch, it is changed to

  0.33 │ 80:   mov      %r12,%rdx                                                                                                                                                             
   0.32 │       or       $0x1,%rdx                                                                                                                                                             
   0.32 │       mov      %r12,%rdi                                                                                                                                                             
   0.39 │       or       $0x2,%rdi                                                                                                                                                             
   0.38 │       mov      %r12,%rcx                                                                                                                                                             
   0.30 │       or       $0x3,%rcx                                                                                                                                                             
   0.37 │       mov      %r12,%rbp                                                                                                                                                             
   0.39 │       or       $0x4,%rbp                                                                                                                                                             
   0.31 │       mov      %r12,%r10                                                                                                                                                             
   0.27 │       or       $0x5,%r10                                                                                                                                                             
   0.31 │       mov      %r12,%r9                                                                                                                                                              
   0.37 │       or       $0x6,%r9                                                                                                                                                              
   0.29 │       mov      %r12,%r8                                                                                                                                                              
   0.35 │       or       $0x7,%r8                                                                                                                                                              
   0.34 │ b1:   mulss    %xmm8,%xmm13                                                                                                                                                          
   0.39 │       addss    (%r11,%r12,4),%xmm13                                                                                                                                                  
   0.33 │       movss    %xmm13,(%r11,%r12,4)                                                                                                                                                  
   0.33 │       mulss    %xmm8,%xmm12                                                                                                                                                          
   0.41 │       addss    (%r11,%rdx,4),%xmm12                                                                                                                                                  
   0.38 │       movss    %xmm12,(%r11,%rdx,4)                                                                                                                                                  
   0.31 │       mulss    %xmm8,%xmm3                                                                                                                                                           
   0.39 │       addss    (%r11,%rdi,4),%xmm3                                                                                                                                                   
   0.35 │       movss    %xmm3,(%r11,%rdi,4)                                                                                                                                                   
   0.41 │       mulss    %xmm8,%xmm4                                                                                                                                                           
   0.31 │       addss    (%r11,%rcx,4),%xmm4                                                                                                                                                   
   0.31 │       movss    %xmm4,(%r11,%rcx,4)                                                                                                                                                   
   0.34 │       mulss    %xmm8,%xmm5                                                                                                                                                           
   0.41 │       addss    (%r11,%rbp,4),%xmm5                                                                                                                                                   
   0.34 │       movss    %xmm5,(%r11,%rbp,4)                                                                                                                                                   
   0.32 │       mulss    %xmm8,%xmm6                                                                                                                                                           
   0.34 │       addss    (%r11,%r10,4),%xmm6                                                                                                                                                   
   0.38 │       movss    %xmm6,(%r11,%r10,4)                                                                                                                                                   
   0.35 │       mulss    %xmm8,%xmm7                                                                                                                                                           
   0.43 │       addss    (%r11,%r9,4),%xmm7                                                                                                                                                    
   0.38 │       movss    %xmm7,(%r11,%r9,4)                                                                                                                                                    
   0.41 │       mulss    %xmm8,%xmm1                                                                                                                                                           
   0.36 │       addss    (%r11,%r8,4),%xmm1                                                                                                                                                    
   0.32 │       movss    %xmm1,(%r11,%r8,4)                                                                                                                                                    
   0.39 │       add      $0x8,%r12                                                                                                                                                             
   0.39 │       add      -0x18(%rsp),%rbx                                                                                                                                                      
   0.02 │       cmp      -0x60(%rsp),%r12                                                                                                                                                      
   0.31 │     ↓ jge      510                                                                    
                ...


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D119965/new/

https://reviews.llvm.org/D119965



More information about the llvm-commits mailing list