[llvm-branch-commits] [LSR] Preserve LCSSA in SCEVRewriter (PR #191665)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Sat Apr 11 13:58:20 PDT 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-arm
Author: Aiden Grossman (boomanaiden154)
<details>
<summary>Changes</summary>
This is necessery to fix some regressions when switching to the NewPM
and seems to improve optimization quality in some cases due to LSR
currently not understanding loop nests. This patch just enables LCSSA
preservation for SCEVRewriter and updates all the relevant tests. There
are some further fixes that are needed to get this fully working that
will be included in follow up patches.
Similar to #<!-- -->185373 (although without follow up fixes and a regression
test).
Regression test added for the specific NewPM case noticed is in
Transforms/LoopStrengthReduce/X86/lcssa-preservation-regression.ll
(does not reproduce without the target triple).
---
Patch is 109.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/191665.diff
43 Files Affected:
- (modified) llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp (+1-1)
- (modified) llvm/test/Analysis/ScalarEvolution/pr62430.ll (+3-5)
- (modified) llvm/test/CodeGen/AArch64/ragreedy-csr.ll (+111-116)
- (modified) llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll (+2-2)
- (modified) llvm/test/CodeGen/X86/lsr-addrecloops.ll (+34-37)
- (modified) llvm/test/Transforms/LoopStrengthReduce/2011-12-19-PostincQuadratic.ll (+2-1)
- (modified) llvm/test/Transforms/LoopStrengthReduce/AArch64/postinc-with-fixups-with-different-loops.ll (+2-1)
- (modified) llvm/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-invalid-ptr-extend.ll (+8-6)
- (modified) llvm/test/Transforms/LoopStrengthReduce/ARM/illegal-addr-modes.ll (+3-2)
- (modified) llvm/test/Transforms/LoopStrengthReduce/Power/incomplete-phi.ll (+2-1)
- (modified) llvm/test/Transforms/LoopStrengthReduce/RISCV/icmp-zero.ll (+1-3)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll (+7-8)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/2011-12-04-loserreg.ll (-1)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/bin_power.ll (+12-6)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/debuginfo-scev-salvage-ptrtoaddr.ll (+2-1)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/eh-insertion-point-2.ll (+4-3)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/eh-insertion-point.ll (+4-3)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/expander-crashes.ll (+3-2)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/icmp-zero-offset-overflow.ll (+4-2)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/incorrect-offset-scaling.ll (+2-1)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/lsr-expand-quadratic.ll (+6-6)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/lsr-overflow.ll (+1-1)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/nested-ptr-addrec.ll (+2-1)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/normalization-during-scev-expansion.ll (+70-37)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/postinc-iv-used-by-urem-and-udiv.ll (+34-22)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/pr40514.ll (+2-6)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/pr46943.ll (+6-3)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/pr47776-do-not-apply-info-from-guards-to-addrecs.ll (+6-5)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/pr62660-normalization-failure.ll (+3-2)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/sibling-loops.ll (+5-2)
- (modified) llvm/test/Transforms/LoopStrengthReduce/X86/zext-signed-addrec.ll (+4-3)
- (modified) llvm/test/Transforms/LoopStrengthReduce/duplicated-phis.ll (+2-1)
- (modified) llvm/test/Transforms/LoopStrengthReduce/lsr-overflow.ll (+2-5)
- (modified) llvm/test/Transforms/LoopStrengthReduce/nonintegral.ll (+5-8)
- (modified) llvm/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll (+2-1)
- (modified) llvm/test/Transforms/LoopStrengthReduce/pr12691.ll (+2-1)
- (modified) llvm/test/Transforms/LoopStrengthReduce/pr25541.ll (+2-1)
- (modified) llvm/test/Transforms/LoopStrengthReduce/pr27056.ll (+6-4)
- (modified) llvm/test/Transforms/LoopStrengthReduce/scaling-factor-incompat-type.ll (+2-1)
- (modified) llvm/test/Transforms/LoopStrengthReduce/scev-incorrect-nuw-inference.ll (+2-1)
- (modified) llvm/test/Transforms/LoopStrengthReduce/uglygep-address-space.ll (+3-5)
- (modified) llvm/test/Transforms/LoopStrengthReduce/uglygep.ll (+5-8)
- (modified) llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll (+16-10)
``````````diff
diff --git a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 5421cad31c3ba..e505b31752441 100644
--- a/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -6157,7 +6157,7 @@ LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0
? PreferredAddresingMode
: TTI.getPreferredAddressingMode(L, &SE)),
- Rewriter(SE, "lsr", false), BaselineCost(L, SE, TTI, AMK) {
+ Rewriter(SE, "lsr"), BaselineCost(L, SE, TTI, AMK) {
// If LoopSimplify form is not available, stay out of trouble.
if (!L->isLoopSimplifyForm())
return;
diff --git a/llvm/test/Analysis/ScalarEvolution/pr62430.ll b/llvm/test/Analysis/ScalarEvolution/pr62430.ll
index a3e3b8ff5c11a..a8e91b34b65d0 100644
--- a/llvm/test/Analysis/ScalarEvolution/pr62430.ll
+++ b/llvm/test/Analysis/ScalarEvolution/pr62430.ll
@@ -13,14 +13,12 @@ define void @test() {
; CHECK-NEXT: [[LSR_IV:%.*]] = phi i32 [ [[LSR_IV_NEXT:%.*]], [[BB7]] ], [ 16, [[BB]] ]
; CHECK-NEXT: br label [[BB4:%.*]]
; CHECK: bb4:
-; CHECK-NEXT: [[LSR_IV3:%.*]] = phi i32 [ [[LSR_IV_NEXT4:%.*]], [[BB6:%.*]] ], [ [[LSR_IV1]], [[BB3]] ]
-; CHECK-NEXT: br i1 true, label [[BB7]], label [[BB6]]
+; CHECK-NEXT: br i1 true, label [[BB7]], label [[BB6:%.*]]
; CHECK: bb6:
-; CHECK-NEXT: [[LSR_IV_NEXT4]] = add i32 [[LSR_IV3]], 268435456
; CHECK-NEXT: br label [[BB4]]
; CHECK: bb7:
-; CHECK-NEXT: [[MUL9:%.*]] = mul i32 [[LSR_IV3]], [[LSR_IV3]]
-; CHECK-NEXT: [[MUL10:%.*]] = mul i32 [[MUL9]], [[LSR_IV3]]
+; CHECK-NEXT: [[MUL9:%.*]] = mul i32 [[LSR_IV1]], [[LSR_IV1]]
+; CHECK-NEXT: [[MUL10:%.*]] = mul i32 [[MUL9]], [[LSR_IV1]]
; CHECK-NEXT: call void @foo(i32 [[MUL10]])
; CHECK-NEXT: [[SEXT:%.*]] = sext i32 [[MUL10]] to i64
; CHECK-NEXT: [[LSR_IV_NEXT]] = add i32 [[LSR_IV]], 32
diff --git a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll
index 31f004e8d72b7..020b69f1a9512 100644
--- a/llvm/test/CodeGen/AArch64/ragreedy-csr.ll
+++ b/llvm/test/CodeGen/AArch64/ragreedy-csr.ll
@@ -32,135 +32,136 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly
; CHECK-NEXT: .cfi_offset w30, -8
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: Lloh0:
-; CHECK-NEXT: adrp x14, __DefaultRuneLocale at GOTPAGE
+; CHECK-NEXT: adrp x15, __DefaultRuneLocale at GOTPAGE
; CHECK-NEXT: ldrb w12, [x0, #4]
-; CHECK-NEXT: ldrb w13, [x1, #4]
-; CHECK-NEXT: ldr x9, [x0, #16]
-; CHECK-NEXT: ldr x10, [x1, #16]
+; CHECK-NEXT: ldrb w14, [x1, #4]
+; CHECK-NEXT: ldr x10, [x0, #16]
+; CHECK-NEXT: ldr x13, [x1, #16]
; CHECK-NEXT: mov x11, xzr
; CHECK-NEXT: Lloh1:
-; CHECK-NEXT: ldr x14, [x14, __DefaultRuneLocale at GOTPAGEOFF]
-; CHECK-NEXT: ldrsb x8, [x9, x11]
-; CHECK-NEXT: tbz x8, #63, LBB0_3
-; CHECK-NEXT: LBB0_2: ; %cond.false.i.i
-; CHECK-NEXT: stp x9, x0, [sp, #32] ; 16-byte Folded Spill
-; CHECK-NEXT: mov w0, w8
-; CHECK-NEXT: mov w1, #32768 ; =0x8000
-; CHECK-NEXT: str x10, [sp, #8] ; 8-byte Spill
-; CHECK-NEXT: str x11, [sp, #24] ; 8-byte Spill
-; CHECK-NEXT: str w12, [sp, #4] ; 4-byte Spill
-; CHECK-NEXT: str w13, [sp, #20] ; 4-byte Spill
-; CHECK-NEXT: bl ___maskrune
-; CHECK-NEXT: Lloh2:
-; CHECK-NEXT: adrp x14, __DefaultRuneLocale at GOTPAGE
-; CHECK-NEXT: mov w8, w0
-; CHECK-NEXT: Lloh3:
-; CHECK-NEXT: ldr x14, [x14, __DefaultRuneLocale at GOTPAGEOFF]
-; CHECK-NEXT: ldp x11, x9, [sp, #24] ; 16-byte Folded Reload
-; CHECK-NEXT: ldr w13, [sp, #20] ; 4-byte Reload
-; CHECK-NEXT: ldr w12, [sp, #4] ; 4-byte Reload
-; CHECK-NEXT: ldr x10, [sp, #8] ; 8-byte Reload
-; CHECK-NEXT: ldr x0, [sp, #40] ; 8-byte Reload
-; CHECK-NEXT: cbz w8, LBB0_4
-; CHECK-NEXT: b LBB0_6
-; CHECK-NEXT: LBB0_3: ; %cond.true.i.i
-; CHECK-NEXT: add x8, x14, x8, lsl #2
+; CHECK-NEXT: ldr x15, [x15, __DefaultRuneLocale at GOTPAGEOFF]
+; CHECK-NEXT: LBB0_2: ; %while.cond
+; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: add x9, x10, x11
+; CHECK-NEXT: ldrsb x8, [x9], #1
+; CHECK-NEXT: tbnz x8, #63, LBB0_8
+; CHECK-NEXT: ; %bb.3: ; %cond.true.i.i
+; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: add x8, x15, x8, lsl #2
; CHECK-NEXT: ldr w8, [x8, #60]
; CHECK-NEXT: and w8, w8, #0x8000
; CHECK-NEXT: cbnz w8, LBB0_6
; CHECK-NEXT: LBB0_4: ; %lor.rhs
-; CHECK-NEXT: ldrsb x8, [x10, x11]
-; CHECK-NEXT: tbnz x8, #63, LBB0_8
+; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: ldrsb x8, [x13, x11]
+; CHECK-NEXT: tbnz x8, #63, LBB0_9
; CHECK-NEXT: ; %bb.5: ; %cond.true.i.i217
-; CHECK-NEXT: add x8, x14, x8, lsl #2
+; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: add x8, x15, x8, lsl #2
; CHECK-NEXT: ldr w8, [x8, #60]
; CHECK-NEXT: and w8, w8, #0x8000
-; CHECK-NEXT: cbz w8, LBB0_9
+; CHECK-NEXT: cbz w8, LBB0_10
; CHECK-NEXT: LBB0_6: ; %while.body
-; CHECK-NEXT: ldrb w8, [x9, x11]
-; CHECK-NEXT: ldrb w15, [x10, x11]
-; CHECK-NEXT: cmp w8, w15
+; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: ldrb w8, [x10, x11]
+; CHECK-NEXT: ldrb w9, [x13, x11]
+; CHECK-NEXT: cmp w8, w9
; CHECK-NEXT: b.ne LBB0_42
; CHECK-NEXT: ; %bb.7: ; %if.end17
+; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: add x11, x11, #1
-; CHECK-NEXT: ldrsb x8, [x9, x11]
-; CHECK-NEXT: tbz x8, #63, LBB0_3
; CHECK-NEXT: b LBB0_2
-; CHECK-NEXT: LBB0_8: ; %cond.false.i.i219
+; CHECK-NEXT: LBB0_8: ; %cond.false.i.i
+; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
; CHECK-NEXT: stp x9, x0, [sp, #32] ; 16-byte Folded Spill
; CHECK-NEXT: mov w0, w8
; CHECK-NEXT: mov w1, #32768 ; =0x8000
-; CHECK-NEXT: str x10, [sp, #8] ; 8-byte Spill
+; CHECK-NEXT: stp x10, x13, [sp, #8] ; 16-byte Folded Spill
; CHECK-NEXT: str x11, [sp, #24] ; 8-byte Spill
-; CHECK-NEXT: str w12, [sp, #4] ; 4-byte Spill
-; CHECK-NEXT: str w13, [sp, #20] ; 4-byte Spill
+; CHECK-NEXT: stp w14, w12, [sp] ; 8-byte Folded Spill
+; CHECK-NEXT: bl ___maskrune
+; CHECK-NEXT: Lloh2:
+; CHECK-NEXT: adrp x15, __DefaultRuneLocale at GOTPAGE
+; CHECK-NEXT: mov w8, w0
+; CHECK-NEXT: Lloh3:
+; CHECK-NEXT: ldr x15, [x15, __DefaultRuneLocale at GOTPAGEOFF]
+; CHECK-NEXT: ldp x13, x11, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp w14, w12, [sp] ; 8-byte Folded Reload
+; CHECK-NEXT: ldp x9, x0, [sp, #32] ; 16-byte Folded Reload
+; CHECK-NEXT: ldr x10, [sp, #8] ; 8-byte Reload
+; CHECK-NEXT: cbz w8, LBB0_4
+; CHECK-NEXT: b LBB0_6
+; CHECK-NEXT: LBB0_9: ; %cond.false.i.i219
+; CHECK-NEXT: ; in Loop: Header=BB0_2 Depth=1
+; CHECK-NEXT: stp x9, x0, [sp, #32] ; 16-byte Folded Spill
+; CHECK-NEXT: mov w0, w8
+; CHECK-NEXT: mov w1, #32768 ; =0x8000
+; CHECK-NEXT: stp x10, x13, [sp, #8] ; 16-byte Folded Spill
+; CHECK-NEXT: str x11, [sp, #24] ; 8-byte Spill
+; CHECK-NEXT: stp w14, w12, [sp] ; 8-byte Folded Spill
; CHECK-NEXT: bl ___maskrune
; CHECK-NEXT: Lloh4:
-; CHECK-NEXT: adrp x14, __DefaultRuneLocale at GOTPAGE
+; CHECK-NEXT: adrp x15, __DefaultRuneLocale at GOTPAGE
; CHECK-NEXT: mov w8, w0
; CHECK-NEXT: Lloh5:
-; CHECK-NEXT: ldr x14, [x14, __DefaultRuneLocale at GOTPAGEOFF]
-; CHECK-NEXT: ldp x11, x9, [sp, #24] ; 16-byte Folded Reload
-; CHECK-NEXT: ldr w13, [sp, #20] ; 4-byte Reload
-; CHECK-NEXT: ldr w12, [sp, #4] ; 4-byte Reload
+; CHECK-NEXT: ldr x15, [x15, __DefaultRuneLocale at GOTPAGEOFF]
+; CHECK-NEXT: ldp x13, x11, [sp, #16] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp w14, w12, [sp] ; 8-byte Folded Reload
+; CHECK-NEXT: ldp x9, x0, [sp, #32] ; 16-byte Folded Reload
; CHECK-NEXT: ldr x10, [sp, #8] ; 8-byte Reload
-; CHECK-NEXT: ldr x0, [sp, #40] ; 8-byte Reload
; CHECK-NEXT: cbnz w8, LBB0_6
-; CHECK-NEXT: LBB0_9: ; %while.end
-; CHECK-NEXT: orr w8, w13, w12
-; CHECK-NEXT: cbnz w8, LBB0_24
-; CHECK-NEXT: ; %bb.10: ; %if.then23
-; CHECK-NEXT: ldr x12, [x0, #16]
-; CHECK-NEXT: ldrb w8, [x9, x11]
-; CHECK-NEXT: ldrb w13, [x12]
-; CHECK-NEXT: cmp w13, #83
+; CHECK-NEXT: LBB0_10: ; %while.end
+; CHECK-NEXT: orr w15, w14, w12
+; CHECK-NEXT: add x8, x13, x11
+; CHECK-NEXT: cbnz w15, LBB0_24
+; CHECK-NEXT: ; %bb.11: ; %if.then23
+; CHECK-NEXT: ldr x14, [x0, #16]
+; CHECK-NEXT: ldrb w12, [x11, x10]
+; CHECK-NEXT: ldrb w15, [x14]
+; CHECK-NEXT: cmp w15, #83
; CHECK-NEXT: b.eq LBB0_19
-; CHECK-NEXT: LBB0_11: ; %while.cond59.preheader
-; CHECK-NEXT: cbz w8, LBB0_23
-; CHECK-NEXT: LBB0_12: ; %land.rhs.preheader
-; CHECK-NEXT: add x12, x9, x11
-; CHECK-NEXT: add x9, x10, x11
-; CHECK-NEXT: add x10, x12, #1
+; CHECK-NEXT: LBB0_12: ; %while.cond59.preheader
+; CHECK-NEXT: cbz w12, LBB0_23
; CHECK-NEXT: LBB0_13: ; %land.rhs
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldrb w11, [x9], #1
-; CHECK-NEXT: cbz w11, LBB0_23
+; CHECK-NEXT: ldrb w10, [x8], #1
+; CHECK-NEXT: cbz w10, LBB0_23
; CHECK-NEXT: ; %bb.14: ; %while.body66
; CHECK-NEXT: ; in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: cmp w8, #42
+; CHECK-NEXT: cmp w12, #42
; CHECK-NEXT: b.eq LBB0_18
; CHECK-NEXT: ; %bb.15: ; %while.body66
; CHECK-NEXT: ; in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: cmp w11, #42
+; CHECK-NEXT: cmp w10, #42
; CHECK-NEXT: b.eq LBB0_18
; CHECK-NEXT: ; %bb.16: ; %lor.lhs.false74
; CHECK-NEXT: ; in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: cmp w8, w11
+; CHECK-NEXT: cmp w12, w10
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: b.ne LBB0_43
; CHECK-NEXT: ; %bb.17: ; %lor.lhs.false74
; CHECK-NEXT: ; in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: cmp w8, #94
+; CHECK-NEXT: cmp w12, #94
; CHECK-NEXT: b.eq LBB0_43
; CHECK-NEXT: LBB0_18: ; %if.then83
; CHECK-NEXT: ; in Loop: Header=BB0_13 Depth=1
-; CHECK-NEXT: ldrb w8, [x10], #1
+; CHECK-NEXT: ldrb w12, [x9], #1
; CHECK-NEXT: mov w0, #1 ; =0x1
-; CHECK-NEXT: cbnz w8, LBB0_13
+; CHECK-NEXT: cbnz w12, LBB0_13
; CHECK-NEXT: b LBB0_43
; CHECK-NEXT: LBB0_19: ; %land.lhs.true28
-; CHECK-NEXT: cbz w8, LBB0_23
+; CHECK-NEXT: cbz w12, LBB0_23
; CHECK-NEXT: ; %bb.20: ; %land.lhs.true28
-; CHECK-NEXT: cmp w8, #112
-; CHECK-NEXT: b.ne LBB0_12
+; CHECK-NEXT: cmp w12, #112
+; CHECK-NEXT: b.ne LBB0_13
; CHECK-NEXT: ; %bb.21: ; %land.lhs.true35
-; CHECK-NEXT: ldrb w13, [x10, x11]
+; CHECK-NEXT: ldrb w13, [x11, x13]
; CHECK-NEXT: cmp w13, #112
-; CHECK-NEXT: b.ne LBB0_12
+; CHECK-NEXT: b.ne LBB0_13
; CHECK-NEXT: ; %bb.22: ; %land.lhs.true43
-; CHECK-NEXT: sub x12, x9, x12
-; CHECK-NEXT: add x12, x12, x11
-; CHECK-NEXT: cmp x12, #1
+; CHECK-NEXT: sub x13, x11, x14
+; CHECK-NEXT: add x13, x10, x13
+; CHECK-NEXT: cmp x13, #1
; CHECK-NEXT: b.ne LBB0_44
; CHECK-NEXT: LBB0_23:
; CHECK-NEXT: mov w0, #1 ; =0x1
@@ -169,77 +170,71 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly
; CHECK-NEXT: cmp w12, #1
; CHECK-NEXT: b.ne LBB0_33
; CHECK-NEXT: ; %bb.25: ; %if.else88
-; CHECK-NEXT: cmp w13, #2
+; CHECK-NEXT: cmp w14, #2
; CHECK-NEXT: b.ne LBB0_33
; CHECK-NEXT: ; %bb.26: ; %while.cond95.preheader
-; CHECK-NEXT: ldrb w12, [x9, x11]
-; CHECK-NEXT: cbz w12, LBB0_23
+; CHECK-NEXT: ldrb w11, [x11, x10]
+; CHECK-NEXT: cbz w11, LBB0_23
; CHECK-NEXT: ; %bb.27: ; %land.rhs99.preheader
-; CHECK-NEXT: mov x8, xzr
+; CHECK-NEXT: mov x10, xzr
; CHECK-NEXT: mov w0, #1 ; =0x1
; CHECK-NEXT: b LBB0_29
; CHECK-NEXT: LBB0_28: ; %if.then117
; CHECK-NEXT: ; in Loop: Header=BB0_29 Depth=1
-; CHECK-NEXT: add x12, x9, x8
-; CHECK-NEXT: add x8, x8, #1
-; CHECK-NEXT: add x12, x12, x11
-; CHECK-NEXT: ldrb w12, [x12, #1]
-; CHECK-NEXT: cbz w12, LBB0_43
+; CHECK-NEXT: ldrb w11, [x9, x10]
+; CHECK-NEXT: add x10, x10, #1
+; CHECK-NEXT: cbz w11, LBB0_43
; CHECK-NEXT: LBB0_29: ; %land.rhs99
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x13, x10, x8
-; CHECK-NEXT: ldrb w13, [x13, x11]
-; CHECK-NEXT: cbz w13, LBB0_23
+; CHECK-NEXT: ldrb w12, [x8, x10]
+; CHECK-NEXT: cbz w12, LBB0_23
; CHECK-NEXT: ; %bb.30: ; %while.body104
; CHECK-NEXT: ; in Loop: Header=BB0_29 Depth=1
-; CHECK-NEXT: cmp w12, w13
+; CHECK-NEXT: cmp w11, w12
; CHECK-NEXT: b.eq LBB0_28
; CHECK-NEXT: ; %bb.31: ; %while.body104
; CHECK-NEXT: ; in Loop: Header=BB0_29 Depth=1
-; CHECK-NEXT: cmp w12, #42
+; CHECK-NEXT: cmp w11, #42
; CHECK-NEXT: b.eq LBB0_28
; CHECK-NEXT: ; %bb.32: ; %while.body104
; CHECK-NEXT: ; in Loop: Header=BB0_29 Depth=1
-; CHECK-NEXT: cmp w13, #94
+; CHECK-NEXT: cmp w12, #94
; CHECK-NEXT: b.eq LBB0_28
; CHECK-NEXT: b LBB0_42
; CHECK-NEXT: LBB0_33: ; %if.else123
-; CHECK-NEXT: cmp w13, #1
+; CHECK-NEXT: cmp w14, #1
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: b.ne LBB0_43
; CHECK-NEXT: ; %bb.34: ; %if.else123
; CHECK-NEXT: cmp w12, #2
; CHECK-NEXT: b.ne LBB0_43
; CHECK-NEXT: ; %bb.35: ; %while.cond130.preheader
-; CHECK-NEXT: ldrb w8, [x9, x11]
-; CHECK-NEXT: cbz w8, LBB0_23
+; CHECK-NEXT: ldrb w10, [x11, x10]
+; CHECK-NEXT: cbz w10, LBB0_23
; CHECK-NEXT: ; %bb.36: ; %land.rhs134.preheader
-; CHECK-NEXT: mov x12, xzr
+; CHECK-NEXT: mov x11, xzr
; CHECK-NEXT: mov w0, #1 ; =0x1
; CHECK-NEXT: b LBB0_38
; CHECK-NEXT: LBB0_37: ; %if.then152
; CHECK-NEXT: ; in Loop: Header=BB0_38 Depth=1
-; CHECK-NEXT: add x8, x9, x12
-; CHECK-NEXT: add x12, x12, #1
-; CHECK-NEXT: add x8, x8, x11
-; CHECK-NEXT: ldrb w8, [x8, #1]
-; CHECK-NEXT: cbz w8, LBB0_43
+; CHECK-NEXT: ldrb w10, [x9, x11]
+; CHECK-NEXT: add x11, x11, #1
+; CHECK-NEXT: cbz w10, LBB0_43
; CHECK-NEXT: LBB0_38: ; %land.rhs134
; CHECK-NEXT: ; =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: add x13, x10, x12
-; CHECK-NEXT: ldrb w13, [x13, x11]
-; CHECK-NEXT: cbz w13, LBB0_23
+; CHECK-NEXT: ldrb w12, [x8, x11]
+; CHECK-NEXT: cbz w12, LBB0_23
; CHECK-NEXT: ; %bb.39: ; %while.body139
; CHECK-NEXT: ; in Loop: Header=BB0_38 Depth=1
-; CHECK-NEXT: cmp w8, w13
+; CHECK-NEXT: cmp w10, w12
; CHECK-NEXT: b.eq LBB0_37
; CHECK-NEXT: ; %bb.40: ; %while.body139
; CHECK-NEXT: ; in Loop: Header=BB0_38 Depth=1
-; CHECK-NEXT: cmp w13, #42
+; CHECK-NEXT: cmp w12, #42
; CHECK-NEXT: b.eq LBB0_37
; CHECK-NEXT: ; %bb.41: ; %while.body139
; CHECK-NEXT: ; in Loop: Header=BB0_38 Depth=1
-; CHECK-NEXT: cmp w8, #94
+; CHECK-NEXT: cmp w10, #94
; CHECK-NEXT: b.eq LBB0_37
; CHECK-NEXT: LBB0_42:
; CHECK-NEXT: mov w0, wzr
@@ -248,17 +243,17 @@ define fastcc i32 @prune_match(ptr nocapture readonly %a, ptr nocapture readonly
; CHECK-NEXT: add sp, sp, #64
; CHECK-NEXT: ret
; CHECK-NEXT: LBB0_44: ; %lor.lhs.false47
-; CHECK-NEXT: cmp x12, #2
-; CHECK-NEXT: b.ne LBB0_11
+; CHECK-NEXT: cmp x13, #2
+; CHECK-NEXT: b.ne LBB0_12
; CHECK-NEXT: ; %bb.45: ; %land.lhs.true52
-; CHECK-NEXT: add x12, x9, x11
+; CHECK-NEXT: add x10, x11, x10
; CHECK-NEXT: mov w0, #1 ; =0x1
-; CHECK-NEXT: ldurb w12, [x12, #-1]
-; CHECK-NEXT: cmp w12, #73
+; CHECK-NEXT: ldurb w10, [x10, #-1]
+; CHECK-NEXT: cmp w10, #73
; CHECK-NEXT: b.eq LBB0_43
; CHECK-NEXT: ; %bb.46: ; %land.lhs.true52
-; CHECK-NEXT: cbz w8, LBB0_43
-; CHECK-NEXT: b LBB0_12
+; CHECK-NEXT: cbz w12, LBB0_43
+; CHECK-NEXT: b LBB0_13
; CHECK-NEXT: LBB0_47:
; CHECK-NEXT: mov w0, wzr
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll b/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll
index 0795525fba1b3..2ad1e14bcf08f 100644
--- a/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll
+++ b/llvm/test/CodeGen/ARM/arm-and-tst-peephole.ll
@@ -38,7 +38,7 @@ define ptr @foo(ptr %this, i32 %acc) nounwind readonly align 2 {
; ARM-NEXT: cmp r3, #2
; ARM-NEXT: bne .LBB0_1
; ARM-NEXT: @ %bb.6: @ %sw.bb8
-; ARM-NEXT: add r1, r1, r12
+; ARM-NEXT: add r1, r12, r1
; ARM-NEXT: add r0, r0, r1, lsl #2
; ARM-NEXT: mov pc, lr
;
@@ -78,7 +78,7 @@ define ptr @foo(ptr %this, i32 %acc) nounwind readonly align 2 {
; THUMB-NEXT: adds r0, r3, #4
; THUMB-NEXT: b .LBB0_8
; THUMB-NEXT: .LBB0_7: @ %sw.bb8
-; THUMB-NEXT: adds r1, r1, r2
+; THUMB-NEXT: adds r1, r2, r1
; THUMB-NEXT: lsls r1, r1, #2
; THUMB-NEXT: adds r0, r0, r1
; THUMB-NEXT: .LBB0_8: @ %sw.bb6
diff --git a/llvm/test/CodeGen/X86/lsr-addrecloops.ll b/llvm/test/CodeGen/X86/lsr-addrecloops.ll
index 98c8f587784c2..a5aaa721add20 100644
--- a/llvm/test/CodeGen/X86/lsr-addrecloops.ll
+++ b/llvm/test/CodeGen/X86/lsr-addrecloops.ll
@@ -13,25 +13,25 @@ define void @in4dob_(ptr nocapture writeonly %0, ptr nocapture readonly %1, ptr
; CHECK: # %bb.0: # %.preheader263
; CHECK-NEXT: leaq (,%rcx,4), %r9
; CHECK-NEXT: movl $1, %r10d
-; CHECK-NEXT: xorl %eax, %eax
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: movq %rdi, %rax
; CHECK-NEXT: jmp .LBB0_1
; CHECK-NEXT: .p2align 4
-; CHECK-NEXT: .LBB0_20: # in Loop: Header=BB0_1 Depth=1
+; CHECK-NEXT: .LBB0_18: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: incq %r10
; CHECK-NEXT: addq %r9, %rax
; CHECK-NEXT: cmpq %r10, %rcx
-; CHECK-NEXT: je .LBB0_18
+; CHECK-NEXT: je .LBB0_19
; CHECK-NEXT: .LBB0_1: # =>This Inner Loop Header: Depth=1
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: vucomiss %xmm0, %xmm1
-; CHECK-NEXT: jne .LBB0_20
-; CHECK-NEXT: jp .LBB0_20
+; CHECK-NEXT: jne .LBB0_18
+; CHECK-NEXT: jp .LBB0_18
; CHECK-NEXT: # %bb.2: # in Loop: Header=BB0_1 Depth=1
; CHECK-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
; CHECK-NEXT: vucomiss %xmm0, %xmm1
-; CHECK-NEXT: jne .LBB0_20
-; CHECK-NEXT: jp .LBB0_20
+; CHECK-NEXT: jne .LBB0_18
+; CHECK-NEXT: jp .LBB0_18
; CHECK-NEXT: # %bb.3: # %vector.body807.preheader
; CHECK-NEXT: leaq 1(%rcx), %rdx
; CHECK-NEXT: movl %edx, %esi
@@ -49,15 +49,14 @@ define void @in4dob_(ptr nocapture writeonly %0, ptr nocapture readonly %1, ptr
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_6: # %vector.body807
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: leaq (%rdi,%r9), %r11
-; CHECK-NEXT: vmovups %ymm0, (%rax,%r11)
-; CHECK-NEXT: vmovups %ymm0, 1(%rax,%r11)
-; CHECK-NEXT: vmovups %ymm0, 2(%rax,%r11)
-; CHECK-NEXT: vmovups %ymm0, 3(%rax,%r11)
-; CHECK-NEXT: vmovups %ymm0, 4(%rax,%r11)
-; CHECK-NEXT: vmovups %ymm0, 5(%rax,%r11)
-; CHECK-NEXT: vmovups %ymm0, 6(%rax,%r11)
-; CHECK-NEXT: vmovups %ymm0, 7(%rax,%r11)
+; CHECK-NEXT: vmovups %ymm0, (%rax,%r9)
+; CHECK-NEXT: vmovups %ymm0, 1(%rax,%r9)
+; CHECK-NEXT: vmovups %ymm0, 2(%rax,%r9)
+; CHECK-NEXT: vmovups %ymm0, 3(%rax,%r9)
+; CHECK-NEXT: vmovups %ymm0, 4(%rax,%r9)
+; CHECK-NEXT: vmovups %ymm0, 5(%rax,%r9)
+; CHECK-NEXT: vmovups %ymm0, 6(%rax,%r9)
+; CHECK-NEXT: vmovups %ymm0, 7(%rax,%r9)
; CHECK-NEXT: addq $8, %r9
; CHECK-NEXT: cmpq %r9, %r10
; CHECK-NEXT: jne .LBB0_6
@@ -65,25 +64,25 @@ define void @in4dob_(ptr nocapture writeonly %0, ptr nocapture readonly %1, ptr
; CHECK-NEXT: testq %rsi, %rsi
; CHECK-NEXT: je .LBB0_10
; CHECK-NEXT: # %bb.8: # %vector.body807.epil.preheader
-; CHECK-NEXT: addq %rdi, %r9
+; CHECK-NEXT: addq %rax, %r9
; CHECK-NEXT: xorl %r10d, %r10d
; CHECK-NEXT: vxorps %xmm0, %xmm0, %xmm0
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_9: # %vector.body807.epil
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: leaq (%r9,%r10), %r11
-; CHECK-NEXT: vmovups %ymm0, (%rax,%r11)
+; CHECK-NEXT: vmovups %ymm0, (%r9,%r10)
; CHECK-NEXT: incq %r10
; CHECK-NEXT: cmpq %r10, %rsi
; CHECK-NEXT: jne .LBB0_9
; CHECK-NEXT: .LBB0_10: # %.lr.ph373
; CHECK-NEXT: testb $1, %r8b
; CHECK-NEXT: je .LBB0_11
-; CHECK-NEXT: # %bb.19: # %scalar.ph839.preheader
+; CHECK-NEXT: # %bb.20: # %scalar.ph839.preheader
; CHECK-NEXT: movl $0, (%rdi)
; CHECK-NEXT: vzeroupper
; CHECK-N...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/191665
More information about the llvm-branch-commits
mailing list