[llvm] [LSR] Only apply postincrement discount on address uses (PR #149341)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 17 08:43:44 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: John Brawn (john-brawn-arm)
<details>
<summary>Changes</summary>
Cost::RateRegister currently applies the pre/postincrement discount to all uses when the addrec is of the correct form, meaning it will be applied even when there's no load/store that can be postincremented. Fix this by checking that the LSRUse kind is Address.
---
Patch is 21.05 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149341.diff
4 Files Affected:
- (modified) llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp (+3-2)
- (modified) llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll (+37-33)
- (modified) llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll (+74-56)
- (added) llvm/test/Transforms/LoopStrengthReduce/ARM/cannot_pre_post_idx.ll (+149)
``````````diff
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index dc8fa4379752f..0e5c96851d3a2 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -1420,8 +1420,9 @@ void Cost::RateRegister(const Formula &F, const SCEV *Reg,
}
unsigned LoopCost = 1;
- if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) ||
- TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) {
+ if (LU.Kind == LSRUse::Address &&
+ (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) ||
+ TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType()))) {
const SCEV *Start;
const SCEVConstant *Step;
if (match(AR, m_scev_AffineAddRec(m_SCEV(Start), m_SCEVConstant(Step))))
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll
index 9c36bae6fac13..ec257bcf123f3 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/minloop.ll
@@ -6,77 +6,81 @@ define void @arm_min_q31(ptr nocapture readonly %pSrc, i32 %blockSize, ptr nocap
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r4, r5, r6, r7, r8, r9, r10, r11, lr}
; CHECK-NEXT: push.w {r4, r5, r6, r7, r8, r9, r10, r11, lr}
+; CHECK-NEXT: .pad #4
+; CHECK-NEXT: sub sp, #4
; CHECK-NEXT: ldr.w r12, [r0]
; CHECK-NEXT: subs.w r9, r1, #1
; CHECK-NEXT: beq .LBB0_3
; CHECK-NEXT: @ %bb.1: @ %while.body.preheader
-; CHECK-NEXT: and r8, r9, #3
+; CHECK-NEXT: and r6, r9, #3
; CHECK-NEXT: subs r7, r1, #2
; CHECK-NEXT: cmp r7, #3
; CHECK-NEXT: bhs .LBB0_4
; CHECK-NEXT: @ %bb.2:
-; CHECK-NEXT: movs r6, #0
-; CHECK-NEXT: b .LBB0_6
+; CHECK-NEXT: mov.w r10, #0
+; CHECK-NEXT: cbnz r6, .LBB0_7
+; CHECK-NEXT: b .LBB0_10
; CHECK-NEXT: .LBB0_3:
-; CHECK-NEXT: movs r6, #0
+; CHECK-NEXT: mov.w r10, #0
; CHECK-NEXT: b .LBB0_10
; CHECK-NEXT: .LBB0_4: @ %while.body.preheader.new
; CHECK-NEXT: bic r7, r9, #3
-; CHECK-NEXT: movs r6, #1
+; CHECK-NEXT: str r6, [sp] @ 4-byte Spill
; CHECK-NEXT: subs r7, #4
+; CHECK-NEXT: movs r6, #1
+; CHECK-NEXT: mov.w r8, #0
+; CHECK-NEXT: mov.w r10, #0
; CHECK-NEXT: add.w lr, r6, r7, lsr #2
-; CHECK-NEXT: movs r6, #0
-; CHECK-NEXT: movs r7, #4
; CHECK-NEXT: .LBB0_5: @ %while.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr r10, [r0, #16]!
-; CHECK-NEXT: sub.w r9, r9, #4
-; CHECK-NEXT: ldrd r5, r4, [r0, #-12]
-; CHECK-NEXT: ldr r11, [r0, #-4]
+; CHECK-NEXT: ldr r11, [r0, #16]!
+; CHECK-NEXT: ldrd r5, r7, [r0, #-12]
+; CHECK-NEXT: ldr r4, [r0, #-4]
; CHECK-NEXT: cmp r12, r5
-; CHECK-NEXT: it gt
-; CHECK-NEXT: subgt r6, r7, #3
; CHECK-NEXT: csel r5, r5, r12, gt
-; CHECK-NEXT: cmp r5, r4
+; CHECK-NEXT: csinc r6, r10, r8, le
+; CHECK-NEXT: cmp r5, r7
; CHECK-NEXT: it gt
-; CHECK-NEXT: subgt r6, r7, #2
-; CHECK-NEXT: csel r5, r4, r5, gt
-; CHECK-NEXT: cmp r5, r11
+; CHECK-NEXT: addgt.w r6, r8, #2
+; CHECK-NEXT: csel r7, r7, r5, gt
+; CHECK-NEXT: cmp r7, r4
; CHECK-NEXT: it gt
-; CHECK-NEXT: subgt r6, r7, #1
-; CHECK-NEXT: csel r5, r11, r5, gt
-; CHECK-NEXT: cmp r5, r10
-; CHECK-NEXT: csel r6, r7, r6, gt
-; CHECK-NEXT: add.w r7, r7, #4
-; CHECK-NEXT: csel r12, r10, r5, gt
+; CHECK-NEXT: addgt.w r6, r8, #3
+; CHECK-NEXT: csel r7, r4, r7, gt
+; CHECK-NEXT: add.w r8, r8, #4
+; CHECK-NEXT: cmp r7, r11
+; CHECK-NEXT: csel r10, r8, r6, gt
+; CHECK-NEXT: csel r12, r11, r7, gt
; CHECK-NEXT: le lr, .LBB0_5
-; CHECK-NEXT: .LBB0_6: @ %while.end.loopexit.unr-lcssa
-; CHECK-NEXT: cmp.w r8, #0
-; CHECK-NEXT: beq .LBB0_10
-; CHECK-NEXT: @ %bb.7: @ %while.body.epil
+; CHECK-NEXT: @ %bb.6: @ %while.end.loopexit.unr-lcssa.loopexit
+; CHECK-NEXT: ldr r6, [sp] @ 4-byte Reload
+; CHECK-NEXT: sub.w r9, r9, r8
+; CHECK-NEXT: cbz r6, .LBB0_10
+; CHECK-NEXT: .LBB0_7: @ %while.body.epil
; CHECK-NEXT: ldr r7, [r0, #4]
; CHECK-NEXT: sub.w r1, r1, r9
; CHECK-NEXT: cmp r12, r7
-; CHECK-NEXT: csel r6, r1, r6, gt
+; CHECK-NEXT: csel r10, r1, r10, gt
; CHECK-NEXT: csel r12, r7, r12, gt
-; CHECK-NEXT: cmp.w r8, #1
+; CHECK-NEXT: cmp r6, #1
; CHECK-NEXT: beq .LBB0_10
; CHECK-NEXT: @ %bb.8: @ %while.body.epil.1
; CHECK-NEXT: ldr r7, [r0, #8]
; CHECK-NEXT: cmp r12, r7
-; CHECK-NEXT: csinc r6, r6, r1, le
+; CHECK-NEXT: csinc r10, r10, r1, le
; CHECK-NEXT: csel r12, r7, r12, gt
-; CHECK-NEXT: cmp.w r8, #2
+; CHECK-NEXT: cmp r6, #2
; CHECK-NEXT: beq .LBB0_10
; CHECK-NEXT: @ %bb.9: @ %while.body.epil.2
; CHECK-NEXT: ldr r0, [r0, #12]
; CHECK-NEXT: cmp r12, r0
; CHECK-NEXT: it gt
-; CHECK-NEXT: addgt r6, r1, #2
+; CHECK-NEXT: addgt.w r10, r1, #2
; CHECK-NEXT: csel r12, r0, r12, gt
; CHECK-NEXT: .LBB0_10: @ %while.end
; CHECK-NEXT: str.w r12, [r2]
-; CHECK-NEXT: str r6, [r3]
+; CHECK-NEXT: str.w r10, [r3]
+; CHECK-NEXT: add sp, #4
; CHECK-NEXT: pop.w {r4, r5, r6, r7, r8, r9, r10, r11, pc}
entry:
%0 = load i32, ptr %pSrc, align 4
diff --git a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll
index 96aff0233e4d9..9c8ef2ed899cf 100644
--- a/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-gather-scatter-ptr-address.ll
@@ -1,24 +1,30 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=thumbv8.1m.main-none-none-eabi -mattr=+mve.fp %s -o - | FileCheck %s
+; FIXME: Loop strength reduction makes suboptimal choices here due to the
+; isLSRCostLess function preferring to minimise the number of addrecs even
+; when it increases the total number of adds.
+
define void @ptr_iv_v4i32(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i32 %y) {
; CHECK-LABEL: ptr_iv_v4i32:
; CHECK: @ %bb.0: @ %vector.ph
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: mov.w lr, #249
; CHECK-NEXT: adr r3, .LCPI0_0
; CHECK-NEXT: vldrw.u32 q0, [r3]
+; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: .LBB0_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2]
-; CHECK-NEXT: adds r0, #64
+; CHECK-NEXT: add.w r4, r0, r12
+; CHECK-NEXT: add.w r3, r1, r12
+; CHECK-NEXT: vldrw.u32 q1, [r4, q0, uxtw #2]
+; CHECK-NEXT: add.w r12, r12, #64
; CHECK-NEXT: vadd.i32 q1, q1, r2
-; CHECK-NEXT: vstrw.32 q1, [r1, q0, uxtw #2]
-; CHECK-NEXT: adds r1, #64
+; CHECK-NEXT: vstrw.32 q1, [r3, q0, uxtw #2]
; CHECK-NEXT: le lr, .LBB0_1
; CHECK-NEXT: @ %bb.2: @ %end
-; CHECK-NEXT: pop {r7, pc}
+; CHECK-NEXT: pop {r4, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: .LCPI0_0:
@@ -110,21 +116,23 @@ end:
define void @ptr_iv_v8i16(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i16 %y) {
; CHECK-LABEL: ptr_iv_v8i16:
; CHECK: @ %bb.0: @ %vector.ph
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: mov.w lr, #249
; CHECK-NEXT: adr r3, .LCPI2_0
; CHECK-NEXT: vldrw.u32 q0, [r3]
+; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: .LBB2_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1]
-; CHECK-NEXT: adds r0, #64
+; CHECK-NEXT: add.w r4, r0, r12
+; CHECK-NEXT: add.w r3, r1, r12
+; CHECK-NEXT: vldrh.u16 q1, [r4, q0, uxtw #1]
+; CHECK-NEXT: add.w r12, r12, #64
; CHECK-NEXT: vadd.i16 q1, q1, r2
-; CHECK-NEXT: vstrh.16 q1, [r1, q0, uxtw #1]
-; CHECK-NEXT: adds r1, #64
+; CHECK-NEXT: vstrh.16 q1, [r3, q0, uxtw #1]
; CHECK-NEXT: le lr, .LBB2_1
; CHECK-NEXT: @ %bb.2: @ %end
-; CHECK-NEXT: pop {r7, pc}
+; CHECK-NEXT: pop {r4, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: .LCPI2_0:
@@ -164,23 +172,25 @@ end:
define void @ptr_iv_v8i16_mult(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i16 %y) {
; CHECK-LABEL: ptr_iv_v8i16_mult:
; CHECK: @ %bb.0: @ %vector.ph
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: mov.w lr, #249
; CHECK-NEXT: adr.w r12, .LCPI3_0
-; CHECK-NEXT: adr r3, .LCPI3_1
-; CHECK-NEXT: vldrw.u32 q0, [r3]
-; CHECK-NEXT: vldrw.u32 q1, [r12]
+; CHECK-NEXT: adr r4, .LCPI3_1
+; CHECK-NEXT: vldrw.u32 q0, [r12]
+; CHECK-NEXT: vldrw.u32 q1, [r4]
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: .LBB3_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrh.u16 q2, [r0, q0]
-; CHECK-NEXT: adds r0, #64
+; CHECK-NEXT: adds r4, r0, r3
+; CHECK-NEXT: add.w r12, r1, r3
+; CHECK-NEXT: vldrh.u16 q2, [r4, q1]
+; CHECK-NEXT: adds r3, #64
; CHECK-NEXT: vadd.i16 q2, q2, r2
-; CHECK-NEXT: vstrh.16 q2, [r1, q1]
-; CHECK-NEXT: adds r1, #64
+; CHECK-NEXT: vstrh.16 q2, [r12, q0]
; CHECK-NEXT: le lr, .LBB3_1
; CHECK-NEXT: @ %bb.2: @ %end
-; CHECK-NEXT: pop {r7, pc}
+; CHECK-NEXT: pop {r4, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: .LCPI3_0:
@@ -230,21 +240,23 @@ end:
define void @ptr_iv_v16i8(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i8 %y) {
; CHECK-LABEL: ptr_iv_v16i8:
; CHECK: @ %bb.0: @ %vector.ph
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: mov.w lr, #249
; CHECK-NEXT: adr r3, .LCPI4_0
; CHECK-NEXT: vldrw.u32 q0, [r3]
+; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: .LBB4_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrb.u8 q1, [r0, q0]
-; CHECK-NEXT: adds r0, #64
+; CHECK-NEXT: add.w r4, r0, r12
+; CHECK-NEXT: add.w r3, r1, r12
+; CHECK-NEXT: vldrb.u8 q1, [r4, q0]
+; CHECK-NEXT: add.w r12, r12, #64
; CHECK-NEXT: vadd.i8 q1, q1, r2
-; CHECK-NEXT: vstrb.8 q1, [r1, q0]
-; CHECK-NEXT: adds r1, #64
+; CHECK-NEXT: vstrb.8 q1, [r3, q0]
; CHECK-NEXT: le lr, .LBB4_1
; CHECK-NEXT: @ %bb.2: @ %end
-; CHECK-NEXT: pop {r7, pc}
+; CHECK-NEXT: pop {r4, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: .LCPI4_0:
@@ -292,23 +304,25 @@ end:
define void @ptr_iv_v16i8_mult(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, i8 %y) {
; CHECK-LABEL: ptr_iv_v16i8_mult:
; CHECK: @ %bb.0: @ %vector.ph
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: mov.w lr, #249
; CHECK-NEXT: adr.w r12, .LCPI5_0
-; CHECK-NEXT: adr r3, .LCPI5_1
-; CHECK-NEXT: vldrw.u32 q0, [r3]
-; CHECK-NEXT: vldrw.u32 q1, [r12]
+; CHECK-NEXT: adr r4, .LCPI5_1
+; CHECK-NEXT: vldrw.u32 q0, [r12]
+; CHECK-NEXT: vldrw.u32 q1, [r4]
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: .LBB5_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrb.u8 q2, [r0, q0]
-; CHECK-NEXT: adds r0, #64
+; CHECK-NEXT: adds r4, r0, r3
+; CHECK-NEXT: add.w r12, r1, r3
+; CHECK-NEXT: vldrb.u8 q2, [r4, q1]
+; CHECK-NEXT: adds r3, #64
; CHECK-NEXT: vadd.i8 q2, q2, r2
-; CHECK-NEXT: vstrb.8 q2, [r1, q1]
-; CHECK-NEXT: adds r1, #64
+; CHECK-NEXT: vstrb.8 q2, [r12, q0]
; CHECK-NEXT: le lr, .LBB5_1
; CHECK-NEXT: @ %bb.2: @ %end
-; CHECK-NEXT: pop {r7, pc}
+; CHECK-NEXT: pop {r4, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: .LCPI5_0:
@@ -374,21 +388,23 @@ end:
define void @ptr_iv_v4f32(ptr noalias nocapture readonly %A, ptr noalias nocapture %B, float %y) {
; CHECK-LABEL: ptr_iv_v4f32:
; CHECK: @ %bb.0: @ %vector.ph
-; CHECK-NEXT: .save {r7, lr}
-; CHECK-NEXT: push {r7, lr}
+; CHECK-NEXT: .save {r4, lr}
+; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: mov.w lr, #249
; CHECK-NEXT: adr r3, .LCPI6_0
; CHECK-NEXT: vldrw.u32 q0, [r3]
+; CHECK-NEXT: mov.w r12, #0
; CHECK-NEXT: .LBB6_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrw.u32 q1, [r0, q0, uxtw #2]
-; CHECK-NEXT: adds r0, #64
+; CHECK-NEXT: add.w r4, r0, r12
+; CHECK-NEXT: add.w r3, r1, r12
+; CHECK-NEXT: vldrw.u32 q1, [r4, q0, uxtw #2]
+; CHECK-NEXT: add.w r12, r12, #64
; CHECK-NEXT: vadd.f32 q1, q1, r2
-; CHECK-NEXT: vstrw.32 q1, [r1, q0, uxtw #2]
-; CHECK-NEXT: adds r1, #64
+; CHECK-NEXT: vstrw.32 q1, [r3, q0, uxtw #2]
; CHECK-NEXT: le lr, .LBB6_1
; CHECK-NEXT: @ %bb.2: @ %end
-; CHECK-NEXT: pop {r7, pc}
+; CHECK-NEXT: pop {r4, pc}
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: @ %bb.3:
; CHECK-NEXT: .LCPI6_0:
@@ -485,16 +501,18 @@ define void @ptr_iv_v8f16(ptr noalias nocapture readonly %A, ptr noalias nocaptu
; CHECK-NEXT: vmov s0, r2
; CHECK-NEXT: mov.w lr, #249
; CHECK-NEXT: vcvtb.f16.f32 s0, s0
-; CHECK-NEXT: adr r3, .LCPI8_0
-; CHECK-NEXT: vmov.f16 r2, s0
-; CHECK-NEXT: vldrw.u32 q0, [r3]
+; CHECK-NEXT: adr r2, .LCPI8_0
+; CHECK-NEXT: vmov.f16 r12, s0
+; CHECK-NEXT: vldrw.u32 q0, [r2]
+; CHECK-NEXT: movs r3, #0
; CHECK-NEXT: .LBB8_1: @ %vector.body
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: vldrh.u16 q1, [r0, q0, uxtw #1]
-; CHECK-NEXT: adds r0, #64
-; CHECK-NEXT: vadd.f16 q1, q1, r2
-; CHECK-NEXT: vstrh.16 q1, [r1, q0, uxtw #1]
-; CHECK-NEXT: adds r1, #64
+; CHECK-NEXT: adds r2, r0, r3
+; CHECK-NEXT: vldrh.u16 q1, [r2, q0, uxtw #1]
+; CHECK-NEXT: adds r2, r1, r3
+; CHECK-NEXT: adds r3, #64
+; CHECK-NEXT: vadd.f16 q1, q1, r12
+; CHECK-NEXT: vstrh.16 q1, [r2, q0, uxtw #1]
; CHECK-NEXT: le lr, .LBB8_1
; CHECK-NEXT: @ %bb.2: @ %end
; CHECK-NEXT: pop {r7, pc}
diff --git a/llvm/test/Transforms/LoopStrengthReduce/ARM/cannot_pre_post_idx.ll b/llvm/test/Transforms/LoopStrengthReduce/ARM/cannot_pre_post_idx.ll
new file mode 100644
index 0000000000000..96dd9a503cc64
--- /dev/null
+++ b/llvm/test/Transforms/LoopStrengthReduce/ARM/cannot_pre_post_idx.ll
@@ -0,0 +1,149 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
+; RUN: opt < %s -loop-reduce -lsr-preferred-addressing-mode=none -S | FileCheck %s --check-prefixes=CHECK,CHECK-NONE
+; RUN: opt < %s -loop-reduce -lsr-preferred-addressing-mode=preindexed -S | FileCheck %s --check-prefixes=CHECK,CHECK-PREINDEXED
+; RUN: opt < %s -loop-reduce -lsr-preferred-addressing-mode=postindexed -S | FileCheck %s --check-prefixes=CHECK,CHECK-POSTINDEXED
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv7m-arm-none-eabi"
+
+; This is an example where we should always pre/postincrement, as it can be
+; folded into the load.
+define i32 @has_load(ptr %p, i32 %n) {
+; CHECK-NONE-LABEL: define i32 @has_load(
+; CHECK-NONE-SAME: ptr [[P:%.*]], i32 [[N:%.*]]) {
+; CHECK-NONE-NEXT: entry:
+; CHECK-NONE-NEXT: br label [[LOOP:%.*]]
+; CHECK-NONE: loop:
+; CHECK-NONE-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[P]], [[ENTRY:%.*]] ]
+; CHECK-NONE-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ [[N]], [[ENTRY]] ]
+; CHECK-NONE-NEXT: [[ACC:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[LOOP]] ]
+; CHECK-NONE-NEXT: [[LOAD:%.*]] = load i32, ptr [[LSR_IV1]], align 4
+; CHECK-NONE-NEXT: [[ADD]] = add i32 [[ACC]], [[LOAD]]
+; CHECK-NONE-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], -1
+; CHECK-NONE-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i32 4
+; CHECK-NONE-NEXT: [[COND:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 0
+; CHECK-NONE-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK-NONE: exit:
+; CHECK-NONE-NEXT: ret i32 [[ACC]]
+;
+; CHECK-PREINDEXED-LABEL: define i32 @has_load(
+; CHECK-PREINDEXED-SAME: ptr [[P:%.*]], i32 [[N:%.*]]) {
+; CHECK-PREINDEXED-NEXT: entry:
+; CHECK-PREINDEXED-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[P]], i32 -4
+; CHECK-PREINDEXED-NEXT: br label [[LOOP:%.*]]
+; CHECK-PREINDEXED: loop:
+; CHECK-PREINDEXED-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP2:%.*]], [[LOOP]] ], [ [[SCEVGEP]], [[ENTRY:%.*]] ]
+; CHECK-PREINDEXED-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ [[N]], [[ENTRY]] ]
+; CHECK-PREINDEXED-NEXT: [[ACC:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[LOOP]] ]
+; CHECK-PREINDEXED-NEXT: [[SCEVGEP3:%.*]] = getelementptr i8, ptr [[LSR_IV1]], i32 4
+; CHECK-PREINDEXED-NEXT: [[LOAD:%.*]] = load i32, ptr [[SCEVGEP3]], align 4
+; CHECK-PREINDEXED-NEXT: [[ADD]] = add i32 [[ACC]], [[LOAD]]
+; CHECK-PREINDEXED-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], -1
+; CHECK-PREINDEXED-NEXT: [[SCEVGEP2]] = getelementptr i8, ptr [[LSR_IV1]], i32 4
+; CHECK-PREINDEXED-NEXT: [[COND:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 0
+; CHECK-PREINDEXED-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK-PREINDEXED: exit:
+; CHECK-PREINDEXED-NEXT: ret i32 [[ACC]]
+;
+; CHECK-POSTINDEXED-LABEL: define i32 @has_load(
+; CHECK-POSTINDEXED-SAME: ptr [[P:%.*]], i32 [[N:%.*]]) {
+; CHECK-POSTINDEXED-NEXT: entry:
+; CHECK-POSTINDEXED-NEXT: br label [[LOOP:%.*]]
+; CHECK-POSTINDEXED: loop:
+; CHECK-POSTINDEXED-NEXT: [[LSR_IV1:%.*]] = phi ptr [ [[SCEVGEP:%.*]], [[LOOP]] ], [ [[P]], [[ENTRY:%.*]] ]
+; CHECK-POSTINDEXED-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[LOOP]] ], [ [[N]], [[ENTRY]] ]
+; CHECK-POSTINDEXED-NEXT: [[ACC:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[LOOP]] ]
+; CHECK-POSTINDEXED-NEXT: [[LOAD:%.*]] = load i32, ptr [[LSR_IV1]], align 4
+; CHECK-POSTINDEXED-NEXT: [[ADD]] = add i32 [[ACC]], [[LOAD]]
+; CHECK-POSTINDEXED-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], -1
+; CHECK-POSTINDEXED-NEXT: [[SCEVGEP]] = getelementptr i8, ptr [[LSR_IV1]], i32 4
+; CHECK-POSTINDEXED-NEXT: [[COND:%.*]] = icmp eq i32 [[LSR_IV_NEXT]], 0
+; CHECK-POSTINDEXED-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK-POSTINDEXED: exit:
+; CHECK-POSTINDEXED-NEXT: ret i32 [[ACC]]
+;
+entry:
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 0, %entry ], [ %idx.next, %loop ]
+ %acc = phi i32 [ 0, %entry ], [ %add, %loop ]
+ %gep = getelementptr i32, ptr %p, i32 %idx
+ %load = load i32, ptr %gep, align 4
+ %add = add i32 %acc, %load
+ %idx.next = add nuw i32 %idx, 1
+ %cond = icmp eq i32 %idx.next, %n
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ ret i32 %acc
+}
+
+; Here there's no load, so there's nothing to fold a pre/postincrement into.
+define i32 @no_mem_access(i32 %n) {
+; CHECK-LABEL: define i32 @no_mem_access(
+; CHECK-SAME: i32 [[N:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEXT: [[IDX:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[IDX_NEXT:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[ACC:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ADD:%.*]], [[LOOP]] ]
+; CHECK-NEXT: [[ADD]] = add i32 [[ACC]], [[IDX]]
+; CHECK-NEXT: [[IDX_NEXT]] = add nuw i32 [[IDX]], 1
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[N]], [[IDX_NEXT]]
+; CHECK-NEXT: br i1 [[COND]], label [[EXIT:%.*]], label [[LOOP]]
+; CHECK: exit:
+; CHECK-NEXT: ret i32 [[ACC]]
+;
+entry:
+ br label %loop
+
+loop:
+ %idx = phi i32 [ 0, %entry ], [ %idx.next, %loop ]
+ %acc = phi i32 [ 0, %entry ], [ %add, %loop ]
+ %add = add i32 %acc, %idx
+ %idx.next = add nuw i32 %idx, 1
+ %cond = icmp eq i32 %idx.next, %n
+ br i1 %cond, label %exit, label %loop
+
+exit:
+ ret i32 %acc
+}
+
+; Here there's a load, but the address is generated in a way that means
+; pre/postincrement isn't possible.
+define i32 @has_load_bad_addr(ptr %p, i32 %n) {
+; CHECK-LABEL: define i32 @has_load_bad_addr(
+; CHECK-SAME: ptr [[P:%.*]], i32 [[N:%.*]]) {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label [[LOOP:%.*]]
+; CHECK: loop:
+; CHECK-NEX...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/149341
More information about the llvm-commits
mailing list