[llvm] 7abe349 - [LSR] Improve filtered uses in NarrowSearchSpaceByPickingWinnerRegs

David Green via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 16 07:48:16 PST 2023


Author: David Green
Date: 2023-02-16T15:48:12Z
New Revision: 7abe3497e72af3ddee789dfc62c63a981a25dbf6

URL: https://github.com/llvm/llvm-project/commit/7abe3497e72af3ddee789dfc62c63a981a25dbf6
DIFF: https://github.com/llvm/llvm-project/commit/7abe3497e72af3ddee789dfc62c63a981a25dbf6.diff

LOG: [LSR] Improve filtered uses in NarrowSearchSpaceByPickingWinnerRegs

NarrowSearchSpaceByPickingWinnerRegs has an aggressive filtering method to
reduce the complexity of the search space down by picking a best formula with
the highest number of reuses and assuming it will yield profitable reuse. In
certain cases we can find a best formula like {X+30,+,1} and later check a
formula like {X,+,1} with the same number of Uses. On some architectures it
can be better to pick {X,+,1}, especially if an offset of 30 can be used as a
legal addressing mode, but -30 cannot. That happens under Thumb1 code, which
has fairly limited addressing modes. This patch adds a check to see if it can
pick the simpler formula, if it looks more profitable.

Differential Revision: https://reviews.llvm.org/D144014

Added: 
    

Modified: 
    llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
    llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index b9d67beda2f6b..f3d263cbde2e8 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -4982,6 +4982,29 @@ void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
   LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
 }
 
+// Check if Best and Reg are SCEVs separated by a constant amount C, and if so
+// would the addressing offset +C would be legal where the negative offset -C is
+// not.
+static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI,
+                                       ScalarEvolution &SE, const SCEV *Best,
+                                       const SCEV *Reg,
+                                       MemAccessTy AccessType) {
+  if (Best->getType() != Reg->getType())
+    return false;
+  const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Best, Reg));
+  if (!Diff)
+    return false;
+
+  return TTI.isLegalAddressingMode(
+             AccessType.MemTy, /*BaseGV=*/nullptr,
+             /*BaseOffset=*/Diff->getAPInt().getSExtValue(),
+             /*HasBaseReg=*/false, /*Scale=*/0, AccessType.AddrSpace) &&
+         !TTI.isLegalAddressingMode(
+             AccessType.MemTy, /*BaseGV=*/nullptr,
+             /*BaseOffset=*/-Diff->getAPInt().getSExtValue(),
+             /*HasBaseReg=*/false, /*Scale=*/0, AccessType.AddrSpace);
+}
+
 /// Pick a register which seems likely to be profitable, and then in any use
 /// which has any reference to that register, delete all formulae which do not
 /// reference that register.
@@ -5010,6 +5033,19 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
           Best = Reg;
           BestNum = Count;
         }
+
+        // If the scores are the same, but the Reg is simpler for the target
+        // (for example {x,+,1} as opposed to {x+C,+,1}, where the target can
+        // handle +C but not -C), opt for the simpler formula.
+        if (Count == BestNum) {
+          int LUIdx = RegUses.getUsedByIndices(Reg).find_first();
+          if (LUIdx >= 0 && Uses[LUIdx].Kind == LSRUse::Address &&
+              IsSimplerBaseSCEVForTarget(TTI, SE, Best, Reg,
+                                         Uses[LUIdx].AccessTy)) {
+            Best = Reg;
+            BestNum = Count;
+          }
+        }
       }
     }
     assert(Best && "Failed to find best LSRUse candidate");

diff  --git a/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll b/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
index 9feb7cc282528..fdfbf3393098e 100644
--- a/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
+++ b/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
@@ -418,9 +418,8 @@ define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr n
 ; CHECK-NEXT:    sub sp, #8
 ; CHECK-NEXT:    mov r7, r2
 ; CHECK-NEXT:    lsrs r3, r2, #2
-; CHECK-NEXT:    bne .LBB1_1
-; CHECK-NEXT:    b .LBB1_12
-; CHECK-NEXT:  .LBB1_1: @ %while.body.preheader
+; CHECK-NEXT:    beq .LBB1_6
+; CHECK-NEXT:  @ %bb.1: @ %while.body.preheader
 ; CHECK-NEXT:    movs r5, #3
 ; CHECK-NEXT:    ands r5, r3
 ; CHECK-NEXT:    subs r2, r3, #1
@@ -441,7 +440,7 @@ define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr n
 ; CHECK-NEXT:    str r6, [r1, #4]
 ; CHECK-NEXT:    subs r1, #8
 ; CHECK-NEXT:    cmp r5, #1
-; CHECK-NEXT:    bne .LBB1_5
+; CHECK-NEXT:    bne .LBB1_11
 ; CHECK-NEXT:  @ %bb.3:
 ; CHECK-NEXT:    adds r1, #16
 ; CHECK-NEXT:    adds r0, #8
@@ -450,64 +449,13 @@ define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr n
 ; CHECK-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
 ; CHECK-NEXT:  .LBB1_4: @ %while.body.prol.loopexit
 ; CHECK-NEXT:    cmp r2, #3
-; CHECK-NEXT:    bhs .LBB1_9
-; CHECK-NEXT:    b .LBB1_12
-; CHECK-NEXT:  .LBB1_5: @ %while.body.prol.1
-; CHECK-NEXT:    ldrh r2, [r0, #8]
-; CHECK-NEXT:    ldrh r4, [r0, #10]
-; CHECK-NEXT:    ldrh r6, [r0, #12]
-; CHECK-NEXT:    ldrh r7, [r0, #14]
-; CHECK-NEXT:    lsls r7, r7, #16
-; CHECK-NEXT:    lsls r6, r6, #16
-; CHECK-NEXT:    lsls r4, r4, #16
-; CHECK-NEXT:    lsls r2, r2, #16
-; CHECK-NEXT:    str r2, [r1, #16]
-; CHECK-NEXT:    str r4, [r1, #20]
-; CHECK-NEXT:    str r6, [r1, #24]
-; CHECK-NEXT:    str r7, [r1, #28]
-; CHECK-NEXT:    cmp r5, #2
-; CHECK-NEXT:    bne .LBB1_7
-; CHECK-NEXT:  @ %bb.6:
-; CHECK-NEXT:    subs r3, r3, #2
-; CHECK-NEXT:    adds r1, #32
-; CHECK-NEXT:    adds r0, #16
-; CHECK-NEXT:    b .LBB1_8
-; CHECK-NEXT:  .LBB1_7: @ %while.body.prol.2
-; CHECK-NEXT:    ldrh r2, [r0, #16]
-; CHECK-NEXT:    ldrh r4, [r0, #18]
-; CHECK-NEXT:    ldrh r5, [r0, #20]
-; CHECK-NEXT:    ldrh r6, [r0, #22]
-; CHECK-NEXT:    lsls r6, r6, #16
-; CHECK-NEXT:    lsls r5, r5, #16
-; CHECK-NEXT:    lsls r4, r4, #16
-; CHECK-NEXT:    lsls r2, r2, #16
-; CHECK-NEXT:    mov r7, r1
-; CHECK-NEXT:    adds r7, #32
-; CHECK-NEXT:    stm r7!, {r2, r4, r5, r6}
-; CHECK-NEXT:    subs r3, r3, #3
-; CHECK-NEXT:    adds r1, #48
-; CHECK-NEXT:    adds r0, #24
-; CHECK-NEXT:  .LBB1_8: @ %while.body.prol.loopexit
-; CHECK-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT:    ldr r2, [sp] @ 4-byte Reload
-; CHECK-NEXT:    cmp r2, #3
-; CHECK-NEXT:    blo .LBB1_12
-; CHECK-NEXT:  .LBB1_9: @ %while.body.preheader1
-; CHECK-NEXT:    adds r0, #30
-; CHECK-NEXT:  .LBB1_10: @ %while.body
+; CHECK-NEXT:    blo .LBB1_6
+; CHECK-NEXT:  .LBB1_5: @ %while.body
 ; CHECK-NEXT:    @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT:    mov r2, r0
-; CHECK-NEXT:    subs r2, #30
-; CHECK-NEXT:    ldrh r2, [r2]
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    subs r4, #28
-; CHECK-NEXT:    ldrh r4, [r4]
-; CHECK-NEXT:    mov r5, r0
-; CHECK-NEXT:    subs r5, #26
-; CHECK-NEXT:    ldrh r5, [r5]
-; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    subs r6, #24
-; CHECK-NEXT:    ldrh r6, [r6]
+; CHECK-NEXT:    ldrh r2, [r0]
+; CHECK-NEXT:    ldrh r4, [r0, #2]
+; CHECK-NEXT:    ldrh r5, [r0, #4]
+; CHECK-NEXT:    ldrh r6, [r0, #6]
 ; CHECK-NEXT:    lsls r6, r6, #16
 ; CHECK-NEXT:    str r6, [r1, #12]
 ; CHECK-NEXT:    lsls r5, r5, #16
@@ -516,18 +464,10 @@ define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr n
 ; CHECK-NEXT:    str r4, [r1, #4]
 ; CHECK-NEXT:    lsls r2, r2, #16
 ; CHECK-NEXT:    str r2, [r1]
-; CHECK-NEXT:    mov r2, r0
-; CHECK-NEXT:    subs r2, #22
-; CHECK-NEXT:    ldrh r2, [r2]
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    subs r4, #20
-; CHECK-NEXT:    ldrh r4, [r4]
-; CHECK-NEXT:    mov r5, r0
-; CHECK-NEXT:    subs r5, #18
-; CHECK-NEXT:    ldrh r5, [r5]
-; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    subs r6, #16
-; CHECK-NEXT:    ldrh r6, [r6]
+; CHECK-NEXT:    ldrh r2, [r0, #8]
+; CHECK-NEXT:    ldrh r4, [r0, #10]
+; CHECK-NEXT:    ldrh r5, [r0, #12]
+; CHECK-NEXT:    ldrh r6, [r0, #14]
 ; CHECK-NEXT:    lsls r6, r6, #16
 ; CHECK-NEXT:    str r6, [r1, #28]
 ; CHECK-NEXT:    lsls r5, r5, #16
@@ -536,18 +476,10 @@ define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr n
 ; CHECK-NEXT:    str r4, [r1, #20]
 ; CHECK-NEXT:    lsls r2, r2, #16
 ; CHECK-NEXT:    str r2, [r1, #16]
-; CHECK-NEXT:    mov r2, r0
-; CHECK-NEXT:    subs r2, #14
-; CHECK-NEXT:    ldrh r2, [r2]
-; CHECK-NEXT:    mov r4, r0
-; CHECK-NEXT:    subs r4, #12
-; CHECK-NEXT:    ldrh r4, [r4]
-; CHECK-NEXT:    mov r5, r0
-; CHECK-NEXT:    subs r5, #10
-; CHECK-NEXT:    ldrh r5, [r5]
-; CHECK-NEXT:    mov r6, r0
-; CHECK-NEXT:    subs r6, #8
-; CHECK-NEXT:    ldrh r6, [r6]
+; CHECK-NEXT:    ldrh r2, [r0, #16]
+; CHECK-NEXT:    ldrh r4, [r0, #18]
+; CHECK-NEXT:    ldrh r5, [r0, #20]
+; CHECK-NEXT:    ldrh r6, [r0, #22]
 ; CHECK-NEXT:    lsls r6, r6, #16
 ; CHECK-NEXT:    str r6, [r1, #44]
 ; CHECK-NEXT:    lsls r5, r5, #16
@@ -556,13 +488,10 @@ define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr n
 ; CHECK-NEXT:    str r4, [r1, #36]
 ; CHECK-NEXT:    lsls r2, r2, #16
 ; CHECK-NEXT:    str r2, [r1, #32]
-; CHECK-NEXT:    subs r2, r0, #6
-; CHECK-NEXT:    ldrh r2, [r2]
-; CHECK-NEXT:    subs r4, r0, #4
-; CHECK-NEXT:    ldrh r4, [r4]
-; CHECK-NEXT:    subs r5, r0, #2
-; CHECK-NEXT:    ldrh r5, [r5]
-; CHECK-NEXT:    ldrh r6, [r0]
+; CHECK-NEXT:    ldrh r2, [r0, #24]
+; CHECK-NEXT:    ldrh r4, [r0, #26]
+; CHECK-NEXT:    ldrh r5, [r0, #28]
+; CHECK-NEXT:    ldrh r6, [r0, #30]
 ; CHECK-NEXT:    lsls r6, r6, #16
 ; CHECK-NEXT:    str r6, [r1, #60]
 ; CHECK-NEXT:    lsls r5, r5, #16
@@ -572,34 +501,74 @@ define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr n
 ; CHECK-NEXT:    lsls r2, r2, #16
 ; CHECK-NEXT:    str r2, [r1, #48]
 ; CHECK-NEXT:    adds r1, #64
-; CHECK-NEXT:    adds r0, #32
 ; CHECK-NEXT:    subs r3, r3, #4
-; CHECK-NEXT:    bne .LBB1_10
-; CHECK-NEXT:  @ %bb.11: @ %while.end.loopexit
-; CHECK-NEXT:    subs r0, #30
-; CHECK-NEXT:  .LBB1_12: @ %while.end
+; CHECK-NEXT:    adds r0, #32
+; CHECK-NEXT:    cmp r3, #0
+; CHECK-NEXT:    bne .LBB1_5
+; CHECK-NEXT:  .LBB1_6: @ %while.end
 ; CHECK-NEXT:    movs r2, #3
 ; CHECK-NEXT:    ands r7, r2
-; CHECK-NEXT:    beq .LBB1_16
-; CHECK-NEXT:  @ %bb.13: @ %while.body12
+; CHECK-NEXT:    beq .LBB1_10
+; CHECK-NEXT:  @ %bb.7: @ %while.body12
 ; CHECK-NEXT:    ldrh r2, [r0]
 ; CHECK-NEXT:    lsls r2, r2, #16
 ; CHECK-NEXT:    str r2, [r1]
 ; CHECK-NEXT:    cmp r7, #1
-; CHECK-NEXT:    beq .LBB1_16
-; CHECK-NEXT:  @ %bb.14: @ %while.body12.1
+; CHECK-NEXT:    beq .LBB1_10
+; CHECK-NEXT:  @ %bb.8: @ %while.body12.1
 ; CHECK-NEXT:    ldrh r2, [r0, #2]
 ; CHECK-NEXT:    lsls r2, r2, #16
 ; CHECK-NEXT:    str r2, [r1, #4]
 ; CHECK-NEXT:    cmp r7, #2
-; CHECK-NEXT:    beq .LBB1_16
-; CHECK-NEXT:  @ %bb.15: @ %while.body12.2
+; CHECK-NEXT:    beq .LBB1_10
+; CHECK-NEXT:  @ %bb.9: @ %while.body12.2
 ; CHECK-NEXT:    ldrh r0, [r0, #4]
 ; CHECK-NEXT:    lsls r0, r0, #16
 ; CHECK-NEXT:    str r0, [r1, #8]
-; CHECK-NEXT:  .LBB1_16: @ %while.end17
+; CHECK-NEXT:  .LBB1_10: @ %while.end17
 ; CHECK-NEXT:    add sp, #8
 ; CHECK-NEXT:    pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT:  .LBB1_11: @ %while.body.prol.1
+; CHECK-NEXT:    ldrh r2, [r0, #8]
+; CHECK-NEXT:    ldrh r4, [r0, #10]
+; CHECK-NEXT:    ldrh r6, [r0, #12]
+; CHECK-NEXT:    ldrh r7, [r0, #14]
+; CHECK-NEXT:    lsls r7, r7, #16
+; CHECK-NEXT:    lsls r6, r6, #16
+; CHECK-NEXT:    lsls r4, r4, #16
+; CHECK-NEXT:    lsls r2, r2, #16
+; CHECK-NEXT:    str r2, [r1, #16]
+; CHECK-NEXT:    str r4, [r1, #20]
+; CHECK-NEXT:    str r6, [r1, #24]
+; CHECK-NEXT:    str r7, [r1, #28]
+; CHECK-NEXT:    cmp r5, #2
+; CHECK-NEXT:    bne .LBB1_13
+; CHECK-NEXT:  @ %bb.12:
+; CHECK-NEXT:    subs r3, r3, #2
+; CHECK-NEXT:    adds r1, #32
+; CHECK-NEXT:    adds r0, #16
+; CHECK-NEXT:    b .LBB1_14
+; CHECK-NEXT:  .LBB1_13: @ %while.body.prol.2
+; CHECK-NEXT:    ldrh r2, [r0, #16]
+; CHECK-NEXT:    ldrh r4, [r0, #18]
+; CHECK-NEXT:    ldrh r5, [r0, #20]
+; CHECK-NEXT:    ldrh r6, [r0, #22]
+; CHECK-NEXT:    lsls r6, r6, #16
+; CHECK-NEXT:    lsls r5, r5, #16
+; CHECK-NEXT:    lsls r4, r4, #16
+; CHECK-NEXT:    lsls r2, r2, #16
+; CHECK-NEXT:    mov r7, r1
+; CHECK-NEXT:    adds r7, #32
+; CHECK-NEXT:    stm r7!, {r2, r4, r5, r6}
+; CHECK-NEXT:    subs r3, r3, #3
+; CHECK-NEXT:    adds r1, #48
+; CHECK-NEXT:    adds r0, #24
+; CHECK-NEXT:  .LBB1_14: @ %while.body.prol.loopexit
+; CHECK-NEXT:    ldr r7, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT:    ldr r2, [sp] @ 4-byte Reload
+; CHECK-NEXT:    cmp r2, #3
+; CHECK-NEXT:    bhs .LBB1_5
+; CHECK-NEXT:    b .LBB1_6
 entry:
   %cmp.not18 = icmp ult i32 %blockSize, 4
   br i1 %cmp.not18, label %while.end, label %while.body.preheader


        


More information about the llvm-commits mailing list