[llvm] 146d44c - [LSR] Don't require register reuse under postinc

David Green via llvm-commits llvm-commits at lists.llvm.org
Tue May 5 08:05:09 PDT 2020


Author: David Green
Date: 2020-05-05T16:04:50+01:00
New Revision: 146d44c2511ab4d87f1d9c45dce56ad71427deab

URL: https://github.com/llvm/llvm-project/commit/146d44c2511ab4d87f1d9c45dce56ad71427deab
DIFF: https://github.com/llvm/llvm-project/commit/146d44c2511ab4d87f1d9c45dce56ad71427deab.diff

LOG: [LSR] Don't require register reuse under postinc

LSR has some logic that tries to aggressively reuse registers in
formula. This can lead to sub-optimal decision in complex loops where
the backend it trying to use shouldFavorPostInc. This disables the
re-use in those situations.

Differential Revision: https://reviews.llvm.org/D79301

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
    llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 36f8c68ef496..652ff6bfb6d8 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -4911,19 +4911,24 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
     // Ignore formulae which may not be ideal in terms of register reuse of
     // ReqRegs.  The formula should use all required registers before
     // introducing new ones.
-    int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
-    for (const SCEV *Reg : ReqRegs) {
-      if ((F.ScaledReg && F.ScaledReg == Reg) ||
-          is_contained(F.BaseRegs, Reg)) {
-        --NumReqRegsToFind;
-        if (NumReqRegsToFind == 0)
-          break;
+    // This can sometimes (notably when trying to favour postinc) lead to
+    // sub-optimial decisions. There it is best left to the cost modelling to
+    // get correct.
+    if (!TTI.shouldFavorPostInc() || LU.Kind != LSRUse::Address) {
+      int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
+      for (const SCEV *Reg : ReqRegs) {
+        if ((F.ScaledReg && F.ScaledReg == Reg) ||
+            is_contained(F.BaseRegs, Reg)) {
+          --NumReqRegsToFind;
+          if (NumReqRegsToFind == 0)
+            break;
+        }
+      }
+      if (NumReqRegsToFind != 0) {
+        // If none of the formulae satisfied the required registers, then we could
+        // clear ReqRegs and try again. Currently, we simply give up in this case.
+        continue;
       }
-    }
-    if (NumReqRegsToFind != 0) {
-      // If none of the formulae satisfied the required registers, then we could
-      // clear ReqRegs and try again. Currently, we simply give up in this case.
-      continue;
     }
 
     // Evaluate the cost of the current formula. If it's already worse than

diff  --git a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll
index f5a165bf4df3..8f01326c002f 100644
--- a/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-postinc-lsr.ll
@@ -1059,39 +1059,36 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf
 ; CHECK-NEXT:    sub sp, #4
 ; CHECK-NEXT:    .vsave {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    vpush {d8, d9, d10, d11, d12, d13, d14, d15}
-; CHECK-NEXT:    .pad #72
-; CHECK-NEXT:    sub sp, #72
+; CHECK-NEXT:    .pad #56
+; CHECK-NEXT:    sub sp, #56
 ; CHECK-NEXT:    cmp r2, #8
-; CHECK-NEXT:    strd r0, r1, [sp, #28] @ 8-byte Folded Spill
-; CHECK-NEXT:    vstr s0, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    str r1, [sp, #20] @ 4-byte Spill
+; CHECK-NEXT:    vstr s0, [sp, #4] @ 4-byte Spill
 ; CHECK-NEXT:    mov r1, r2
-; CHECK-NEXT:    str r2, [sp, #16] @ 4-byte Spill
+; CHECK-NEXT:    str r2, [sp, #8] @ 4-byte Spill
 ; CHECK-NEXT:    blo.w .LBB7_9
 ; CHECK-NEXT:  @ %bb.1:
-; CHECK-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
-; CHECK-NEXT:    movs r6, #1
-; CHECK-NEXT:    str r2, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    ldr r2, [sp, #8] @ 4-byte Reload
+; CHECK-NEXT:    movs r3, #1
+; CHECK-NEXT:    mov.w r10, #0
+; CHECK-NEXT:    str r2, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    lsrs r1, r2, #2
-; CHECK-NEXT:    movs r2, #0
-; CHECK-NEXT:    str r2, [sp, #48] @ 4-byte Spill
 ; CHECK-NEXT:    b .LBB7_3
 ; CHECK-NEXT:  .LBB7_2: @ in Loop: Header=BB7_3 Depth=1
-; CHECK-NEXT:    ldr r3, [sp, #20] @ 4-byte Reload
-; CHECK-NEXT:    lsls r6, r6, #2
-; CHECK-NEXT:    ldr r2, [sp, #48] @ 4-byte Reload
-; CHECK-NEXT:    cmp r3, #7
-; CHECK-NEXT:    asr.w r1, r3, #2
-; CHECK-NEXT:    add.w r2, r2, #1
-; CHECK-NEXT:    str r2, [sp, #48] @ 4-byte Spill
+; CHECK-NEXT:    ldr r2, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    add.w r10, r10, #1
+; CHECK-NEXT:    lsls r3, r3, #2
+; CHECK-NEXT:    cmp r2, #7
+; CHECK-NEXT:    asr.w r1, r2, #2
 ; CHECK-NEXT:    ble .LBB7_9
 ; CHECK-NEXT:  .LBB7_3: @ =>This Loop Header: Depth=1
 ; CHECK-NEXT:    @ Child Loop BB7_6 Depth 2
 ; CHECK-NEXT:    @ Child Loop BB7_7 Depth 3
-; CHECK-NEXT:    str r1, [sp, #20] @ 4-byte Spill
-; CHECK-NEXT:    cmp r6, #1
-; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    str r1, [sp, #12] @ 4-byte Spill
+; CHECK-NEXT:    cmp r3, #1
+; CHECK-NEXT:    ldr r1, [sp, #16] @ 4-byte Reload
 ; CHECK-NEXT:    lsr.w r2, r1, #2
-; CHECK-NEXT:    str r2, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    str r2, [sp, #16] @ 4-byte Spill
 ; CHECK-NEXT:    blt .LBB7_2
 ; CHECK-NEXT:  @ %bb.4: @ in Loop: Header=BB7_3 Depth=1
 ; CHECK-NEXT:    movs r2, #0
@@ -1100,86 +1097,81 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf
 ; CHECK-NEXT:  @ %bb.5: @ %.preheader
 ; CHECK-NEXT:    @ in Loop: Header=BB7_3 Depth=1
 ; CHECK-NEXT:    lsrs r2, r1, #3
-; CHECK-NEXT:    lsls r0, r1, #1
-; CHECK-NEXT:    ldr r1, [sp, #24] @ 4-byte Reload
-; CHECK-NEXT:    mov.w r10, #0
-; CHECK-NEXT:    str r2, [sp, #40] @ 4-byte Spill
-; CHECK-NEXT:    str r0, [sp, #36] @ 4-byte Spill
-; CHECK-NEXT:    lsls r5, r1, #3
-; CHECK-NEXT:    lsls r7, r1, #4
-; CHECK-NEXT:    add.w r1, r1, r1, lsl #1
-; CHECK-NEXT:    str r6, [sp, #44] @ 4-byte Spill
-; CHECK-NEXT:    lsls r3, r1, #3
+; CHECK-NEXT:    lsls r1, r1, #1
+; CHECK-NEXT:    str r2, [sp, #28] @ 4-byte Spill
+; CHECK-NEXT:    movs r5, #0
+; CHECK-NEXT:    ldr r2, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    str r1, [sp, #24] @ 4-byte Spill
+; CHECK-NEXT:    str r3, [sp, #32] @ 4-byte Spill
+; CHECK-NEXT:    lsl.w r11, r2, #1
 ; CHECK-NEXT:  .LBB7_6: @ Parent Loop BB7_3 Depth=1
 ; CHECK-NEXT:    @ => This Loop Header: Depth=2
 ; CHECK-NEXT:    @ Child Loop BB7_7 Depth 3
-; CHECK-NEXT:    ldr r1, [sp, #28] @ 4-byte Reload
-; CHECK-NEXT:    ldr r6, [sp, #48] @ 4-byte Reload
-; CHECK-NEXT:    ldrd r4, r0, [r1, #24]
-; CHECK-NEXT:    ldrd r12, r2, [r1, #16]
-; CHECK-NEXT:    ldrd r8, r9, [r1, #32]
-; CHECK-NEXT:    ldr r1, [sp, #36] @ 4-byte Reload
-; CHECK-NEXT:    ldr.w lr, [sp, #40] @ 4-byte Reload
-; CHECK-NEXT:    ldr.w r4, [r4, r6, lsl #2]
-; CHECK-NEXT:    mul r1, r1, r10
-; CHECK-NEXT:    ldr.w r11, [r2, r6, lsl #2]
-; CHECK-NEXT:    ldr.w r6, [r12, r6, lsl #2]
+; CHECK-NEXT:    add.w r12, r0, #16
+; CHECK-NEXT:    ldr r4, [sp, #24] @ 4-byte Reload
+; CHECK-NEXT:    ldr.w lr, [sp, #28] @ 4-byte Reload
+; CHECK-NEXT:    ldm.w r12, {r1, r2, r3, r12}
+; CHECK-NEXT:    muls r4, r5, r4
+; CHECK-NEXT:    ldr.w r2, [r2, r10, lsl #2]
+; CHECK-NEXT:    ldr.w r1, [r1, r10, lsl #2]
+; CHECK-NEXT:    ldrd r6, r7, [r0, #32]
+; CHECK-NEXT:    ldr.w r3, [r3, r10, lsl #2]
 ; CHECK-NEXT:    dls lr, lr
-; CHECK-NEXT:    ldr r2, [sp, #32] @ 4-byte Reload
-; CHECK-NEXT:    add.w r12, r2, r1, lsl #2
-; CHECK-NEXT:    add.w r2, r9, r4, lsl #2
+; CHECK-NEXT:    add.w r6, r6, r2, lsl #2
+; CHECK-NEXT:    add.w r12, r12, r1, lsl #2
+; CHECK-NEXT:    ldr r1, [sp, #20] @ 4-byte Reload
+; CHECK-NEXT:    add.w r2, r1, r4, lsl #2
+; CHECK-NEXT:    add.w r3, r7, r3, lsl #2
+; CHECK-NEXT:    add.w r1, r2, r11, lsl #2
+; CHECK-NEXT:    add.w r8, r1, r11, lsl #2
 ; CHECK-NEXT:    add.w r9, r8, r11, lsl #2
-; CHECK-NEXT:    add.w r8, r0, r6, lsl #2
 ; CHECK-NEXT:  .LBB7_7: @ Parent Loop BB7_3 Depth=1
 ; CHECK-NEXT:    @ Parent Loop BB7_6 Depth=2
 ; CHECK-NEXT:    @ => This Inner Loop Header: Depth=3
-; CHECK-NEXT:    add.w r4, r12, r7
-; CHECK-NEXT:    add.w r1, r12, r3
-; CHECK-NEXT:    add.w r6, r12, r5
-; CHECK-NEXT:    vldrw.u32 q3, [r12]
-; CHECK-NEXT:    vldrw.u32 q7, [r4]
+; CHECK-NEXT:    vldrw.u32 q3, [r9]
 ; CHECK-NEXT:    vldrw.u32 q4, [r1]
-; CHECK-NEXT:    vldrw.u32 q5, [r6]
-; CHECK-NEXT:    vsub.f32 q0, q3, q7
-; CHECK-NEXT:    vadd.f32 q3, q7, q3
-; CHECK-NEXT:    vsub.f32 q6, q5, q4
-; CHECK-NEXT:    vcadd.f32 q1, q0, q6, #270
-; CHECK-NEXT:    vcadd.f32 q2, q0, q6, #90
-; CHECK-NEXT:    vadd.f32 q0, q5, q4
+; CHECK-NEXT:    vldrw.u32 q6, [r8]
+; CHECK-NEXT:    vldrw.u32 q7, [r2]
+; CHECK-NEXT:    vsub.f32 q5, q4, q3
+; CHECK-NEXT:    vsub.f32 q0, q7, q6
+; CHECK-NEXT:    vcadd.f32 q1, q0, q5, #270
+; CHECK-NEXT:    vcadd.f32 q2, q0, q5, #90
+; CHECK-NEXT:    vadd.f32 q0, q4, q3
+; CHECK-NEXT:    vadd.f32 q3, q6, q7
 ; CHECK-NEXT:    vsub.f32 q4, q3, q0
 ; CHECK-NEXT:    vadd.f32 q0, q3, q0
-; CHECK-NEXT:    vstrb.8 q0, [r12], #16
-; CHECK-NEXT:    vldrw.u32 q0, [r9], #16
+; CHECK-NEXT:    vstrb.8 q0, [r2], #16
+; CHECK-NEXT:    vldrw.u32 q0, [r6], #16
 ; CHECK-NEXT:    vcmul.f32 q3, q0, q4, #0
 ; CHECK-NEXT:    vcmla.f32 q3, q0, q4, #90
-; CHECK-NEXT:    vstrw.32 q3, [r6]
-; CHECK-NEXT:    vldrw.u32 q0, [r8], #16
+; CHECK-NEXT:    vstrb.8 q3, [r1], #16
+; CHECK-NEXT:    vldrw.u32 q0, [r12], #16
 ; CHECK-NEXT:    vcmul.f32 q3, q0, q2, #0
 ; CHECK-NEXT:    vcmla.f32 q3, q0, q2, #90
-; CHECK-NEXT:    vstrw.32 q3, [r4]
-; CHECK-NEXT:    vldrw.u32 q0, [r2], #16
+; CHECK-NEXT:    vstrb.8 q3, [r8], #16
+; CHECK-NEXT:    vldrw.u32 q0, [r3], #16
 ; CHECK-NEXT:    vcmul.f32 q2, q0, q1, #0
 ; CHECK-NEXT:    vcmla.f32 q2, q0, q1, #90
-; CHECK-NEXT:    vstrw.32 q2, [r1]
+; CHECK-NEXT:    vstrb.8 q2, [r9], #16
 ; CHECK-NEXT:    le lr, .LBB7_7
 ; CHECK-NEXT:  @ %bb.8: @ in Loop: Header=BB7_6 Depth=2
-; CHECK-NEXT:    ldr r6, [sp, #44] @ 4-byte Reload
-; CHECK-NEXT:    add.w r10, r10, #1
-; CHECK-NEXT:    cmp r10, r6
+; CHECK-NEXT:    ldr r3, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    adds r5, #1
+; CHECK-NEXT:    cmp r5, r3
 ; CHECK-NEXT:    bne .LBB7_6
 ; CHECK-NEXT:    b .LBB7_2
 ; CHECK-NEXT:  .LBB7_9:
 ; CHECK-NEXT:    adr r0, .LCPI7_0
 ; CHECK-NEXT:    vldrw.u32 q1, [r0]
-; CHECK-NEXT:    ldr r0, [sp, #32] @ 4-byte Reload
+; CHECK-NEXT:    ldr r0, [sp, #20] @ 4-byte Reload
 ; CHECK-NEXT:    vadd.i32 q1, q1, r0
 ; CHECK-NEXT:    vldrw.u32 q2, [q1, #64]!
-; CHECK-NEXT:    ldr r0, [sp, #16] @ 4-byte Reload
+; CHECK-NEXT:    ldr r0, [sp, #8] @ 4-byte Reload
 ; CHECK-NEXT:    lsr.w lr, r0, #3
 ; CHECK-NEXT:    wls lr, lr, .LBB7_12
 ; CHECK-NEXT:  @ %bb.10:
 ; CHECK-NEXT:    vldrw.u32 q3, [q1, #16]
-; CHECK-NEXT:    vldr s0, [sp, #12] @ 4-byte Reload
+; CHECK-NEXT:    vldr s0, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:    vmov r0, s0
 ; CHECK-NEXT:  .LBB7_11: @ =>This Inner Loop Header: Depth=1
 ; CHECK-NEXT:    vldrw.u32 q0, [q1, #24]
@@ -1190,14 +1182,14 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf
 ; CHECK-NEXT:    vsub.f32 q0, q4, q0
 ; CHECK-NEXT:    vsub.f32 q7, q6, q5
 ; CHECK-NEXT:    vcadd.f32 q4, q2, q0, #270
-; CHECK-NEXT:    vstrw.32 q7, [sp, #48] @ 16-byte Spill
+; CHECK-NEXT:    vstrw.32 q7, [sp, #32] @ 16-byte Spill
 ; CHECK-NEXT:    vcadd.f32 q7, q2, q0, #90
 ; CHECK-NEXT:    vadd.f32 q0, q6, q5
 ; CHECK-NEXT:    vldrw.u32 q2, [q1, #64]!
 ; CHECK-NEXT:    vmul.f32 q0, q0, r0
 ; CHECK-NEXT:    vldrw.u32 q3, [q1, #16]
 ; CHECK-NEXT:    vstrw.32 q0, [q1, #-64]
-; CHECK-NEXT:    vldrw.u32 q5, [sp, #48] @ 16-byte Reload
+; CHECK-NEXT:    vldrw.u32 q5, [sp, #32] @ 16-byte Reload
 ; CHECK-NEXT:    vmul.f32 q0, q4, r0
 ; CHECK-NEXT:    vmul.f32 q4, q7, r0
 ; CHECK-NEXT:    vmul.f32 q5, q5, r0
@@ -1206,7 +1198,7 @@ define arm_aapcs_vfpcc void @_Z37_arm_radix4_butterfly_inverse_f32_mvePK21arm_cf
 ; CHECK-NEXT:    vstrw.32 q0, [q1, #-40]
 ; CHECK-NEXT:    le lr, .LBB7_11
 ; CHECK-NEXT:  .LBB7_12:
-; CHECK-NEXT:    add sp, #72
+; CHECK-NEXT:    add sp, #56
 ; CHECK-NEXT:    vpop {d8, d9, d10, d11, d12, d13, d14, d15}
 ; CHECK-NEXT:    add sp, #4
 ; CHECK-NEXT:    pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}


        


More information about the llvm-commits mailing list