[llvm] 7abe349 - [LSR] Improve filtered uses in NarrowSearchSpaceByPickingWinnerRegs
David Green via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 16 07:48:16 PST 2023
Author: David Green
Date: 2023-02-16T15:48:12Z
New Revision: 7abe3497e72af3ddee789dfc62c63a981a25dbf6
URL: https://github.com/llvm/llvm-project/commit/7abe3497e72af3ddee789dfc62c63a981a25dbf6
DIFF: https://github.com/llvm/llvm-project/commit/7abe3497e72af3ddee789dfc62c63a981a25dbf6.diff
LOG: [LSR] Improve filtered uses in NarrowSearchSpaceByPickingWinnerRegs
NarrowSearchSpaceByPickingWinnerRegs has an aggressive filtering method to
reduce the complexity of the search space down by picking a best formula with
the highest number of reuses and assuming it will yield profitable reuse. In
certain cases we can find a best formula like {X+30,+,1} and later check a
formula like {X,+,1} with the same number of Uses. On some architectures it
can be better to pick {X,+,1}, especially if an offset of 30 can be used as a
legal addressing mode, but -30 cannot. That happens under Thumb1 code, which
has fairly limited addressing modes. This patch adds a check to see if it can
pick the simpler formula, if it looks more profitable.
Differential Revision: https://reviews.llvm.org/D144014
Added:
Modified:
llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index b9d67beda2f6b..f3d263cbde2e8 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -4982,6 +4982,29 @@ void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
}
+// Check if Best and Reg are SCEVs separated by a constant amount C, and if so
+// would the addressing offset +C would be legal where the negative offset -C is
+// not.
+static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI,
+ ScalarEvolution &SE, const SCEV *Best,
+ const SCEV *Reg,
+ MemAccessTy AccessType) {
+ if (Best->getType() != Reg->getType())
+ return false;
+ const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Best, Reg));
+ if (!Diff)
+ return false;
+
+ return TTI.isLegalAddressingMode(
+ AccessType.MemTy, /*BaseGV=*/nullptr,
+ /*BaseOffset=*/Diff->getAPInt().getSExtValue(),
+ /*HasBaseReg=*/false, /*Scale=*/0, AccessType.AddrSpace) &&
+ !TTI.isLegalAddressingMode(
+ AccessType.MemTy, /*BaseGV=*/nullptr,
+ /*BaseOffset=*/-Diff->getAPInt().getSExtValue(),
+ /*HasBaseReg=*/false, /*Scale=*/0, AccessType.AddrSpace);
+}
+
/// Pick a register which seems likely to be profitable, and then in any use
/// which has any reference to that register, delete all formulae which do not
/// reference that register.
@@ -5010,6 +5033,19 @@ void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
Best = Reg;
BestNum = Count;
}
+
+ // If the scores are the same, but the Reg is simpler for the target
+ // (for example {x,+,1} as opposed to {x+C,+,1}, where the target can
+ // handle +C but not -C), opt for the simpler formula.
+ if (Count == BestNum) {
+ int LUIdx = RegUses.getUsedByIndices(Reg).find_first();
+ if (LUIdx >= 0 && Uses[LUIdx].Kind == LSRUse::Address &&
+ IsSimplerBaseSCEVForTarget(TTI, SE, Best, Reg,
+ Uses[LUIdx].AccessTy)) {
+ Best = Reg;
+ BestNum = Count;
+ }
+ }
}
}
assert(Best && "Failed to find best LSRUse candidate");
diff --git a/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll b/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
index 9feb7cc282528..fdfbf3393098e 100644
--- a/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
+++ b/llvm/test/CodeGen/Thumb/arm_q15_to_q31.ll
@@ -418,9 +418,8 @@ define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr n
; CHECK-NEXT: sub sp, #8
; CHECK-NEXT: mov r7, r2
; CHECK-NEXT: lsrs r3, r2, #2
-; CHECK-NEXT: bne .LBB1_1
-; CHECK-NEXT: b .LBB1_12
-; CHECK-NEXT: .LBB1_1: @ %while.body.preheader
+; CHECK-NEXT: beq .LBB1_6
+; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
; CHECK-NEXT: movs r5, #3
; CHECK-NEXT: ands r5, r3
; CHECK-NEXT: subs r2, r3, #1
@@ -441,7 +440,7 @@ define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr n
; CHECK-NEXT: str r6, [r1, #4]
; CHECK-NEXT: subs r1, #8
; CHECK-NEXT: cmp r5, #1
-; CHECK-NEXT: bne .LBB1_5
+; CHECK-NEXT: bne .LBB1_11
; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: adds r1, #16
; CHECK-NEXT: adds r0, #8
@@ -450,64 +449,13 @@ define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr n
; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
; CHECK-NEXT: .LBB1_4: @ %while.body.prol.loopexit
; CHECK-NEXT: cmp r2, #3
-; CHECK-NEXT: bhs .LBB1_9
-; CHECK-NEXT: b .LBB1_12
-; CHECK-NEXT: .LBB1_5: @ %while.body.prol.1
-; CHECK-NEXT: ldrh r2, [r0, #8]
-; CHECK-NEXT: ldrh r4, [r0, #10]
-; CHECK-NEXT: ldrh r6, [r0, #12]
-; CHECK-NEXT: ldrh r7, [r0, #14]
-; CHECK-NEXT: lsls r7, r7, #16
-; CHECK-NEXT: lsls r6, r6, #16
-; CHECK-NEXT: lsls r4, r4, #16
-; CHECK-NEXT: lsls r2, r2, #16
-; CHECK-NEXT: str r2, [r1, #16]
-; CHECK-NEXT: str r4, [r1, #20]
-; CHECK-NEXT: str r6, [r1, #24]
-; CHECK-NEXT: str r7, [r1, #28]
-; CHECK-NEXT: cmp r5, #2
-; CHECK-NEXT: bne .LBB1_7
-; CHECK-NEXT: @ %bb.6:
-; CHECK-NEXT: subs r3, r3, #2
-; CHECK-NEXT: adds r1, #32
-; CHECK-NEXT: adds r0, #16
-; CHECK-NEXT: b .LBB1_8
-; CHECK-NEXT: .LBB1_7: @ %while.body.prol.2
-; CHECK-NEXT: ldrh r2, [r0, #16]
-; CHECK-NEXT: ldrh r4, [r0, #18]
-; CHECK-NEXT: ldrh r5, [r0, #20]
-; CHECK-NEXT: ldrh r6, [r0, #22]
-; CHECK-NEXT: lsls r6, r6, #16
-; CHECK-NEXT: lsls r5, r5, #16
-; CHECK-NEXT: lsls r4, r4, #16
-; CHECK-NEXT: lsls r2, r2, #16
-; CHECK-NEXT: mov r7, r1
-; CHECK-NEXT: adds r7, #32
-; CHECK-NEXT: stm r7!, {r2, r4, r5, r6}
-; CHECK-NEXT: subs r3, r3, #3
-; CHECK-NEXT: adds r1, #48
-; CHECK-NEXT: adds r0, #24
-; CHECK-NEXT: .LBB1_8: @ %while.body.prol.loopexit
-; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
-; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
-; CHECK-NEXT: cmp r2, #3
-; CHECK-NEXT: blo .LBB1_12
-; CHECK-NEXT: .LBB1_9: @ %while.body.preheader1
-; CHECK-NEXT: adds r0, #30
-; CHECK-NEXT: .LBB1_10: @ %while.body
+; CHECK-NEXT: blo .LBB1_6
+; CHECK-NEXT: .LBB1_5: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: mov r2, r0
-; CHECK-NEXT: subs r2, #30
-; CHECK-NEXT: ldrh r2, [r2]
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: subs r4, #28
-; CHECK-NEXT: ldrh r4, [r4]
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: subs r5, #26
-; CHECK-NEXT: ldrh r5, [r5]
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: subs r6, #24
-; CHECK-NEXT: ldrh r6, [r6]
+; CHECK-NEXT: ldrh r2, [r0]
+; CHECK-NEXT: ldrh r4, [r0, #2]
+; CHECK-NEXT: ldrh r5, [r0, #4]
+; CHECK-NEXT: ldrh r6, [r0, #6]
; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: str r6, [r1, #12]
; CHECK-NEXT: lsls r5, r5, #16
@@ -516,18 +464,10 @@ define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr n
; CHECK-NEXT: str r4, [r1, #4]
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1]
-; CHECK-NEXT: mov r2, r0
-; CHECK-NEXT: subs r2, #22
-; CHECK-NEXT: ldrh r2, [r2]
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: subs r4, #20
-; CHECK-NEXT: ldrh r4, [r4]
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: subs r5, #18
-; CHECK-NEXT: ldrh r5, [r5]
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: subs r6, #16
-; CHECK-NEXT: ldrh r6, [r6]
+; CHECK-NEXT: ldrh r2, [r0, #8]
+; CHECK-NEXT: ldrh r4, [r0, #10]
+; CHECK-NEXT: ldrh r5, [r0, #12]
+; CHECK-NEXT: ldrh r6, [r0, #14]
; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: str r6, [r1, #28]
; CHECK-NEXT: lsls r5, r5, #16
@@ -536,18 +476,10 @@ define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr n
; CHECK-NEXT: str r4, [r1, #20]
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1, #16]
-; CHECK-NEXT: mov r2, r0
-; CHECK-NEXT: subs r2, #14
-; CHECK-NEXT: ldrh r2, [r2]
-; CHECK-NEXT: mov r4, r0
-; CHECK-NEXT: subs r4, #12
-; CHECK-NEXT: ldrh r4, [r4]
-; CHECK-NEXT: mov r5, r0
-; CHECK-NEXT: subs r5, #10
-; CHECK-NEXT: ldrh r5, [r5]
-; CHECK-NEXT: mov r6, r0
-; CHECK-NEXT: subs r6, #8
-; CHECK-NEXT: ldrh r6, [r6]
+; CHECK-NEXT: ldrh r2, [r0, #16]
+; CHECK-NEXT: ldrh r4, [r0, #18]
+; CHECK-NEXT: ldrh r5, [r0, #20]
+; CHECK-NEXT: ldrh r6, [r0, #22]
; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: str r6, [r1, #44]
; CHECK-NEXT: lsls r5, r5, #16
@@ -556,13 +488,10 @@ define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr n
; CHECK-NEXT: str r4, [r1, #36]
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1, #32]
-; CHECK-NEXT: subs r2, r0, #6
-; CHECK-NEXT: ldrh r2, [r2]
-; CHECK-NEXT: subs r4, r0, #4
-; CHECK-NEXT: ldrh r4, [r4]
-; CHECK-NEXT: subs r5, r0, #2
-; CHECK-NEXT: ldrh r5, [r5]
-; CHECK-NEXT: ldrh r6, [r0]
+; CHECK-NEXT: ldrh r2, [r0, #24]
+; CHECK-NEXT: ldrh r4, [r0, #26]
+; CHECK-NEXT: ldrh r5, [r0, #28]
+; CHECK-NEXT: ldrh r6, [r0, #30]
; CHECK-NEXT: lsls r6, r6, #16
; CHECK-NEXT: str r6, [r1, #60]
; CHECK-NEXT: lsls r5, r5, #16
@@ -572,34 +501,74 @@ define void @arm_q15_to_q31_altorder(ptr nocapture noundef readonly %pSrc, ptr n
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1, #48]
; CHECK-NEXT: adds r1, #64
-; CHECK-NEXT: adds r0, #32
; CHECK-NEXT: subs r3, r3, #4
-; CHECK-NEXT: bne .LBB1_10
-; CHECK-NEXT: @ %bb.11: @ %while.end.loopexit
-; CHECK-NEXT: subs r0, #30
-; CHECK-NEXT: .LBB1_12: @ %while.end
+; CHECK-NEXT: adds r0, #32
+; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: bne .LBB1_5
+; CHECK-NEXT: .LBB1_6: @ %while.end
; CHECK-NEXT: movs r2, #3
; CHECK-NEXT: ands r7, r2
-; CHECK-NEXT: beq .LBB1_16
-; CHECK-NEXT: @ %bb.13: @ %while.body12
+; CHECK-NEXT: beq .LBB1_10
+; CHECK-NEXT: @ %bb.7: @ %while.body12
; CHECK-NEXT: ldrh r2, [r0]
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1]
; CHECK-NEXT: cmp r7, #1
-; CHECK-NEXT: beq .LBB1_16
-; CHECK-NEXT: @ %bb.14: @ %while.body12.1
+; CHECK-NEXT: beq .LBB1_10
+; CHECK-NEXT: @ %bb.8: @ %while.body12.1
; CHECK-NEXT: ldrh r2, [r0, #2]
; CHECK-NEXT: lsls r2, r2, #16
; CHECK-NEXT: str r2, [r1, #4]
; CHECK-NEXT: cmp r7, #2
-; CHECK-NEXT: beq .LBB1_16
-; CHECK-NEXT: @ %bb.15: @ %while.body12.2
+; CHECK-NEXT: beq .LBB1_10
+; CHECK-NEXT: @ %bb.9: @ %while.body12.2
; CHECK-NEXT: ldrh r0, [r0, #4]
; CHECK-NEXT: lsls r0, r0, #16
; CHECK-NEXT: str r0, [r1, #8]
-; CHECK-NEXT: .LBB1_16: @ %while.end17
+; CHECK-NEXT: .LBB1_10: @ %while.end17
; CHECK-NEXT: add sp, #8
; CHECK-NEXT: pop {r4, r5, r6, r7, pc}
+; CHECK-NEXT: .LBB1_11: @ %while.body.prol.1
+; CHECK-NEXT: ldrh r2, [r0, #8]
+; CHECK-NEXT: ldrh r4, [r0, #10]
+; CHECK-NEXT: ldrh r6, [r0, #12]
+; CHECK-NEXT: ldrh r7, [r0, #14]
+; CHECK-NEXT: lsls r7, r7, #16
+; CHECK-NEXT: lsls r6, r6, #16
+; CHECK-NEXT: lsls r4, r4, #16
+; CHECK-NEXT: lsls r2, r2, #16
+; CHECK-NEXT: str r2, [r1, #16]
+; CHECK-NEXT: str r4, [r1, #20]
+; CHECK-NEXT: str r6, [r1, #24]
+; CHECK-NEXT: str r7, [r1, #28]
+; CHECK-NEXT: cmp r5, #2
+; CHECK-NEXT: bne .LBB1_13
+; CHECK-NEXT: @ %bb.12:
+; CHECK-NEXT: subs r3, r3, #2
+; CHECK-NEXT: adds r1, #32
+; CHECK-NEXT: adds r0, #16
+; CHECK-NEXT: b .LBB1_14
+; CHECK-NEXT: .LBB1_13: @ %while.body.prol.2
+; CHECK-NEXT: ldrh r2, [r0, #16]
+; CHECK-NEXT: ldrh r4, [r0, #18]
+; CHECK-NEXT: ldrh r5, [r0, #20]
+; CHECK-NEXT: ldrh r6, [r0, #22]
+; CHECK-NEXT: lsls r6, r6, #16
+; CHECK-NEXT: lsls r5, r5, #16
+; CHECK-NEXT: lsls r4, r4, #16
+; CHECK-NEXT: lsls r2, r2, #16
+; CHECK-NEXT: mov r7, r1
+; CHECK-NEXT: adds r7, #32
+; CHECK-NEXT: stm r7!, {r2, r4, r5, r6}
+; CHECK-NEXT: subs r3, r3, #3
+; CHECK-NEXT: adds r1, #48
+; CHECK-NEXT: adds r0, #24
+; CHECK-NEXT: .LBB1_14: @ %while.body.prol.loopexit
+; CHECK-NEXT: ldr r7, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: ldr r2, [sp] @ 4-byte Reload
+; CHECK-NEXT: cmp r2, #3
+; CHECK-NEXT: bhs .LBB1_5
+; CHECK-NEXT: b .LBB1_6
entry:
%cmp.not18 = icmp ult i32 %blockSize, 4
br i1 %cmp.not18, label %while.end, label %while.body.preheader
More information about the llvm-commits
mailing list