[llvm] [polly] [SCEVExp] Use Builder.CreateBinOp in InsertBinOp. (PR #154148)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Apr 2 01:56:59 PDT 2026
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/154148
>From 6d2dbdf38562e88d3abbc345d024a90814264cbb Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 18 Aug 2025 16:18:37 +0100
Subject: [PATCH 1/4] [SCEVExp] Use Builder.CreateBinOp in InsertBinOp.
SCEVExpander's builder already uses InstSimplifyFolder. Use it to
construct binary ops via CreateBinOp instead of BinaryOperator::Create.
This helps to simplify away a few more instructions during SCEV
expansion.
---
.../Utils/ScalarEvolutionExpander.cpp | 20 ++---
llvm/test/CodeGen/AArch64/sink-and-fold.ll | 3 +-
.../LowOverheadLoops/no-dec-le-simple.ll | 22 +++---
llvm/test/CodeGen/X86/break-false-dep.ll | 76 +++++++++----------
.../expand-scev-expand-simplifications.ll | 5 +-
.../X86/2011-11-29-postincphi.ll | 11 ++-
.../LoopStrengthReduce/X86/pr46943.ll | 9 +--
.../LoopStrengthReduce/depth-limit-overrun.ll | 10 +--
.../lsr-rewrite-to-add-one.ll | 3 +-
.../LoopStrengthReduce/wrong-hoisting-iv.ll | 12 +--
10 files changed, 77 insertions(+), 94 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index ac60837584763..bbe4d16e759f6 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -337,16 +337,16 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
}
// If we haven't found this binop, insert it.
- // TODO: Use the Builder, which will make CreateBinOp below fold with
- // InstSimplifyFolder.
- Instruction *BO = Builder.Insert(BinaryOperator::Create(Opcode, LHS, RHS));
- BO->setDebugLoc(Loc);
- if (Flags & SCEV::FlagNUW)
- BO->setHasNoUnsignedWrap();
- if (Flags & SCEV::FlagNSW)
- BO->setHasNoSignedWrap();
-
- return BO;
+ Value *Op = Builder.CreateBinOp(Opcode, LHS, RHS);
+ if (auto *BO = dyn_cast<Instruction>(Op)) {
+ BO->setDebugLoc(Loc);
+ if (Flags & SCEV::FlagNUW)
+ BO->setHasNoUnsignedWrap();
+ if (Flags & SCEV::FlagNSW)
+ BO->setHasNoSignedWrap();
+ }
+
+ return Op;
}
/// expandAddToGEP - Expand an addition expression with a pointer type into
diff --git a/llvm/test/CodeGen/AArch64/sink-and-fold.ll b/llvm/test/CodeGen/AArch64/sink-and-fold.ll
index 4d383fefc43c7..a47026f2c095d 100644
--- a/llvm/test/CodeGen/AArch64/sink-and-fold.ll
+++ b/llvm/test/CodeGen/AArch64/sink-and-fold.ll
@@ -160,10 +160,9 @@ define void @f4(ptr %a, i64 %n) nounwind "target-features"="+alu-lsl-fast" {
; CHECK-NEXT: .LBB4_5: // %LJ.latch
; CHECK-NEXT: // in Loop: Header=BB4_6 Depth=2
; CHECK-NEXT: add x8, x21, #1
+; CHECK-NEXT: cmp x21, x19
; CHECK-NEXT: str w0, [x20, x21, lsl #2]
-; CHECK-NEXT: sub x9, x8, #1
; CHECK-NEXT: mov x21, x8
-; CHECK-NEXT: cmp x9, x19
; CHECK-NEXT: b.ge .LBB4_2
; CHECK-NEXT: .LBB4_6: // %LJ
; CHECK-NEXT: // Parent Loop BB4_3 Depth=1
diff --git a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll
index 8e8934b6e9599..fb17345c6ecf0 100644
--- a/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll
+++ b/llvm/test/CodeGen/Thumb2/LowOverheadLoops/no-dec-le-simple.ll
@@ -96,17 +96,18 @@ exit:
define void @cbz_exit_minsize(ptr %in, ptr %res) #0 {
; CHECK-LABEL: cbz_exit_minsize:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: .LBB3_1: @ %loop
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr.w r3, [r0, r2, lsl #2]
+; CHECK-NEXT: ldr.w r4, [r0, r2, lsl #2]
+; CHECK-NEXT: mov r3, r2
; CHECK-NEXT: adds r2, #1
-; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: bne .LBB3_1
; CHECK-NEXT: @ %bb.2: @ %exit
-; CHECK-NEXT: subs r0, r2, #1
-; CHECK-NEXT: str r0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-NEXT: str r3, [r1]
+; CHECK-NEXT: pop {r4, pc}
entry:
br label %loop
@@ -126,17 +127,18 @@ exit:
define void @cbnz_exit_minsize(ptr %in, ptr %res) #0 {
; CHECK-LABEL: cbnz_exit_minsize:
; CHECK: @ %bb.0: @ %entry
+; CHECK-NEXT: push {r4, lr}
; CHECK-NEXT: movs r2, #0
; CHECK-NEXT: .LBB4_1: @ %loop
; CHECK-NEXT: @ =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: ldr.w r3, [r0, r2, lsl #2]
+; CHECK-NEXT: ldr.w r4, [r0, r2, lsl #2]
+; CHECK-NEXT: mov r3, r2
; CHECK-NEXT: adds r2, #1
-; CHECK-NEXT: cmp r3, #0
+; CHECK-NEXT: cmp r4, #0
; CHECK-NEXT: beq .LBB4_1
; CHECK-NEXT: @ %bb.2: @ %exit
-; CHECK-NEXT: subs r0, r2, #1
-; CHECK-NEXT: str r0, [r1]
-; CHECK-NEXT: bx lr
+; CHECK-NEXT: str r3, [r1]
+; CHECK-NEXT: pop {r4, pc}
entry:
br label %loop
diff --git a/llvm/test/CodeGen/X86/break-false-dep.ll b/llvm/test/CodeGen/X86/break-false-dep.ll
index 6943622fac7f2..6df19767b2bb4 100644
--- a/llvm/test/CodeGen/X86/break-false-dep.ll
+++ b/llvm/test/CodeGen/X86/break-false-dep.ll
@@ -1308,28 +1308,27 @@ define dso_local void @loopclearance2(ptr nocapture %y, ptr %x, double %c1, doub
; SSE-LINUX-NEXT: .LBB13_1: # %inner_loop
; SSE-LINUX-NEXT: # =>This Inner Loop Header: Depth=1
; SSE-LINUX-NEXT: movq %rcx, %r8
-; SSE-LINUX-NEXT: shrq $6, %r8
-; SSE-LINUX-NEXT: movq (%rsi,%r8,8), %r8
-; SSE-LINUX-NEXT: btq %rcx, %r8
-; SSE-LINUX-NEXT: leaq 1(%rcx), %rcx
+; SSE-LINUX-NEXT: shrq $6, %rcx
+; SSE-LINUX-NEXT: movq (%rsi,%rcx,8), %r9
+; SSE-LINUX-NEXT: leaq 1(%r8), %rcx
+; SSE-LINUX-NEXT: btq %r8, %r9
; SSE-LINUX-NEXT: jae .LBB13_1
; SSE-LINUX-NEXT: # %bb.2: # %loop_end
; SSE-LINUX-NEXT: # in Loop: Header=BB13_1 Depth=1
-; SSE-LINUX-NEXT: leaq 1(%rax), %r8
+; SSE-LINUX-NEXT: leaq 1(%rax), %r9
; SSE-LINUX-NEXT: xorps %xmm4, %xmm4
-; SSE-LINUX-NEXT: cvtsi2sd %r8, %xmm4
+; SSE-LINUX-NEXT: cvtsi2sd %r9, %xmm4
; SSE-LINUX-NEXT: movapd %xmm0, %xmm5
; SSE-LINUX-NEXT: subsd %xmm4, %xmm5
; SSE-LINUX-NEXT: mulsd %xmm1, %xmm5
-; SSE-LINUX-NEXT: leaq -1(%rcx), %r9
; SSE-LINUX-NEXT: xorps %xmm4, %xmm4
-; SSE-LINUX-NEXT: cvtsi2sd %r9, %xmm4
+; SSE-LINUX-NEXT: cvtsi2sd %r8, %xmm4
; SSE-LINUX-NEXT: mulsd %xmm2, %xmm4
; SSE-LINUX-NEXT: addsd %xmm5, %xmm4
; SSE-LINUX-NEXT: divsd %xmm3, %xmm4
; SSE-LINUX-NEXT: movsd %xmm4, -8(%rdi,%rax,8)
-; SSE-LINUX-NEXT: movq %r8, %rax
-; SSE-LINUX-NEXT: cmpq %r8, %rdx
+; SSE-LINUX-NEXT: movq %r9, %rax
+; SSE-LINUX-NEXT: cmpq %r9, %rdx
; SSE-LINUX-NEXT: jge .LBB13_1
; SSE-LINUX-NEXT: # %bb.3: # %loopdone
; SSE-LINUX-NEXT: retq
@@ -1380,28 +1379,27 @@ define dso_local void @loopclearance2(ptr nocapture %y, ptr %x, double %c1, doub
; SSE-WIN-NEXT: .LBB13_1: # %inner_loop
; SSE-WIN-NEXT: # =>This Inner Loop Header: Depth=1
; SSE-WIN-NEXT: movq %r9, %r10
-; SSE-WIN-NEXT: shrq $6, %r10
-; SSE-WIN-NEXT: movq (%rdx,%r10,8), %r10
-; SSE-WIN-NEXT: btq %r9, %r10
-; SSE-WIN-NEXT: leaq 1(%r9), %r9
+; SSE-WIN-NEXT: shrq $6, %r9
+; SSE-WIN-NEXT: movq (%rdx,%r9,8), %r11
+; SSE-WIN-NEXT: leaq 1(%r10), %r9
+; SSE-WIN-NEXT: btq %r10, %r11
; SSE-WIN-NEXT: jae .LBB13_1
; SSE-WIN-NEXT: # %bb.2: # %loop_end
; SSE-WIN-NEXT: # in Loop: Header=BB13_1 Depth=1
-; SSE-WIN-NEXT: leaq 1(%r8), %r10
+; SSE-WIN-NEXT: leaq 1(%r8), %r11
; SSE-WIN-NEXT: xorps %xmm4, %xmm4
-; SSE-WIN-NEXT: cvtsi2sd %r10, %xmm4
+; SSE-WIN-NEXT: cvtsi2sd %r11, %xmm4
; SSE-WIN-NEXT: movapd %xmm2, %xmm5
; SSE-WIN-NEXT: subsd %xmm4, %xmm5
; SSE-WIN-NEXT: mulsd %xmm3, %xmm5
-; SSE-WIN-NEXT: leaq -1(%r9), %r11
; SSE-WIN-NEXT: xorps %xmm4, %xmm4
-; SSE-WIN-NEXT: cvtsi2sd %r11, %xmm4
+; SSE-WIN-NEXT: cvtsi2sd %r10, %xmm4
; SSE-WIN-NEXT: mulsd %xmm1, %xmm4
; SSE-WIN-NEXT: addsd %xmm5, %xmm4
; SSE-WIN-NEXT: divsd %xmm0, %xmm4
; SSE-WIN-NEXT: movsd %xmm4, -8(%rcx,%r8,8)
-; SSE-WIN-NEXT: movq %r10, %r8
-; SSE-WIN-NEXT: cmpq %r10, %rax
+; SSE-WIN-NEXT: movq %r11, %r8
+; SSE-WIN-NEXT: cmpq %r11, %rax
; SSE-WIN-NEXT: jge .LBB13_1
; SSE-WIN-NEXT: # %bb.3: # %loopdone
; SSE-WIN-NEXT: movaps (%rsp), %xmm7 # 16-byte Reload
@@ -1465,25 +1463,24 @@ define dso_local void @loopclearance2(ptr nocapture %y, ptr %x, double %c1, doub
; AVX1-NEXT: .LBB13_1: # %inner_loop
; AVX1-NEXT: # =>This Inner Loop Header: Depth=1
; AVX1-NEXT: movq %r9, %r10
-; AVX1-NEXT: shrq $6, %r10
-; AVX1-NEXT: movq (%rdx,%r10,8), %r10
-; AVX1-NEXT: btq %r9, %r10
-; AVX1-NEXT: leaq 1(%r9), %r9
+; AVX1-NEXT: shrq $6, %r9
+; AVX1-NEXT: movq (%rdx,%r9,8), %r11
+; AVX1-NEXT: leaq 1(%r10), %r9
+; AVX1-NEXT: btq %r10, %r11
; AVX1-NEXT: jae .LBB13_1
; AVX1-NEXT: # %bb.2: # %loop_end
; AVX1-NEXT: # in Loop: Header=BB13_1 Depth=1
-; AVX1-NEXT: leaq 1(%r8), %r10
-; AVX1-NEXT: vcvtsi2sd %r10, %xmm6, %xmm4
+; AVX1-NEXT: leaq 1(%r8), %r11
+; AVX1-NEXT: vcvtsi2sd %r11, %xmm6, %xmm4
; AVX1-NEXT: vsubsd %xmm4, %xmm2, %xmm4
; AVX1-NEXT: vmulsd %xmm3, %xmm4, %xmm4
-; AVX1-NEXT: leaq -1(%r9), %r11
-; AVX1-NEXT: vcvtsi2sd %r11, %xmm6, %xmm5
+; AVX1-NEXT: vcvtsi2sd %r10, %xmm6, %xmm5
; AVX1-NEXT: vmulsd %xmm1, %xmm5, %xmm5
; AVX1-NEXT: vaddsd %xmm5, %xmm4, %xmm4
; AVX1-NEXT: vdivsd %xmm0, %xmm4, %xmm4
; AVX1-NEXT: vmovsd %xmm4, -8(%rcx,%r8,8)
-; AVX1-NEXT: movq %r10, %r8
-; AVX1-NEXT: cmpq %r10, %rax
+; AVX1-NEXT: movq %r11, %r8
+; AVX1-NEXT: cmpq %r11, %rax
; AVX1-NEXT: jge .LBB13_1
; AVX1-NEXT: # %bb.3: # %loopdone
; AVX1-NEXT: vmovaps (%rsp), %xmm7 # 16-byte Reload
@@ -1547,25 +1544,24 @@ define dso_local void @loopclearance2(ptr nocapture %y, ptr %x, double %c1, doub
; AVX512VL-NEXT: .LBB13_1: # %inner_loop
; AVX512VL-NEXT: # =>This Inner Loop Header: Depth=1
; AVX512VL-NEXT: movq %r9, %r10
-; AVX512VL-NEXT: shrq $6, %r10
-; AVX512VL-NEXT: movq (%rdx,%r10,8), %r10
-; AVX512VL-NEXT: btq %r9, %r10
-; AVX512VL-NEXT: leaq 1(%r9), %r9
+; AVX512VL-NEXT: shrq $6, %r9
+; AVX512VL-NEXT: movq (%rdx,%r9,8), %r11
+; AVX512VL-NEXT: leaq 1(%r10), %r9
+; AVX512VL-NEXT: btq %r10, %r11
; AVX512VL-NEXT: jae .LBB13_1
; AVX512VL-NEXT: # %bb.2: # %loop_end
; AVX512VL-NEXT: # in Loop: Header=BB13_1 Depth=1
-; AVX512VL-NEXT: leaq 1(%r8), %r10
-; AVX512VL-NEXT: vcvtsi2sd %r10, %xmm6, %xmm4
+; AVX512VL-NEXT: leaq 1(%r8), %r11
+; AVX512VL-NEXT: vcvtsi2sd %r11, %xmm6, %xmm4
; AVX512VL-NEXT: vsubsd %xmm4, %xmm2, %xmm4
; AVX512VL-NEXT: vmulsd %xmm3, %xmm4, %xmm4
-; AVX512VL-NEXT: leaq -1(%r9), %r11
-; AVX512VL-NEXT: vcvtsi2sd %r11, %xmm6, %xmm5
+; AVX512VL-NEXT: vcvtsi2sd %r10, %xmm6, %xmm5
; AVX512VL-NEXT: vmulsd %xmm1, %xmm5, %xmm5
; AVX512VL-NEXT: vaddsd %xmm5, %xmm4, %xmm4
; AVX512VL-NEXT: vdivsd %xmm0, %xmm4, %xmm4
; AVX512VL-NEXT: vmovsd %xmm4, -8(%rcx,%r8,8)
-; AVX512VL-NEXT: movq %r10, %r8
-; AVX512VL-NEXT: cmpq %r10, %rax
+; AVX512VL-NEXT: movq %r11, %r8
+; AVX512VL-NEXT: cmpq %r11, %rax
; AVX512VL-NEXT: jge .LBB13_1
; AVX512VL-NEXT: # %bb.3: # %loopdone
; AVX512VL-NEXT: vmovaps (%rsp), %xmm7 # 16-byte Reload
diff --git a/llvm/test/Transforms/LoopIdiom/expand-scev-expand-simplifications.ll b/llvm/test/Transforms/LoopIdiom/expand-scev-expand-simplifications.ll
index 9a59e5a8ccabb..07cb1158950f3 100644
--- a/llvm/test/Transforms/LoopIdiom/expand-scev-expand-simplifications.ll
+++ b/llvm/test/Transforms/LoopIdiom/expand-scev-expand-simplifications.ll
@@ -7,10 +7,9 @@ define void @test_simplify_scev_during_expansion_flags(i64 %start) {
; CHECK-LABEL: define void @test_simplify_scev_during_expansion_flags(
; CHECK-SAME: i64 [[START:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[START_NEG:%.*]] = sub i64 0, [[START]]
+; CHECK-NEXT: [[START_NEG:%.*]] = sub nsw i64 0, [[START]]
; CHECK-NEXT: [[START_MUL:%.*]] = ashr exact i64 [[START_NEG]], 2
-; CHECK-NEXT: [[TMP0:%.*]] = shl nsw i64 [[START_MUL]], 2
-; CHECK-NEXT: [[TMP1:%.*]] = sub i64 404, [[TMP0]]
+; CHECK-NEXT: [[TMP1:%.*]] = sub i64 404, [[START_NEG]]
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 null, i8 0, i64 [[TMP1]], i1 false)
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
index 7ae78ae6a1fd4..6a70fe48de7e2 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/2011-11-29-postincphi.ll
@@ -20,17 +20,16 @@ define i64 @sqlite3DropTriggerPtr() nounwind {
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB0_1: # %bb1
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: movq %rbx, %rcx
; CHECK-NEXT: testb %al, %al
-; CHECK-NEXT: je .LBB0_4
+; CHECK-NEXT: je .LBB0_3
; CHECK-NEXT: # %bb.2: # %bb4
; CHECK-NEXT: # in Loop: Header=BB0_1 Depth=1
-; CHECK-NEXT: incq %rbx
+; CHECK-NEXT: leaq 1(%rcx), %rbx
; CHECK-NEXT: testb %al, %al
; CHECK-NEXT: jne .LBB0_1
-; CHECK-NEXT: # %bb.3: # %bb8split
-; CHECK-NEXT: decq %rbx
-; CHECK-NEXT: .LBB0_4: # %bb8
-; CHECK-NEXT: movq %rbx, %rax
+; CHECK-NEXT: .LBB0_3: # %bb8
+; CHECK-NEXT: movq %rcx, %rax
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: retq
bb:
diff --git a/llvm/test/Transforms/LoopStrengthReduce/X86/pr46943.ll b/llvm/test/Transforms/LoopStrengthReduce/X86/pr46943.ll
index 89523b4487d49..b59ca63abfa41 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/X86/pr46943.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/X86/pr46943.ll
@@ -19,8 +19,7 @@ define i8 @drop_nuw() {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[IV_NEXT]], 0
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
-; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[IV_NEXT]], -1
-; CHECK-NEXT: ret i8 [[TMP0]]
+; CHECK-NEXT: ret i8 [[IV]]
;
entry:
br label %loop
@@ -49,8 +48,7 @@ define i8 @drop_nsw() {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[IV_NEXT]], 127
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
-; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[IV_NEXT]], 1
-; CHECK-NEXT: ret i8 [[TMP0]]
+; CHECK-NEXT: ret i8 [[IV]]
;
entry:
br label %loop
@@ -79,8 +77,7 @@ define i8 @already_postinc() {
; CHECK-NEXT: [[CMP:%.*]] = icmp eq i8 [[IV_NEXT]], -1
; CHECK-NEXT: br i1 [[CMP]], label [[EXIT:%.*]], label [[LOOP]]
; CHECK: exit:
-; CHECK-NEXT: [[TMP0:%.*]] = add i8 [[IV_NEXT]], -1
-; CHECK-NEXT: ret i8 [[TMP0]]
+; CHECK-NEXT: ret i8 [[IV]]
;
entry:
br label %loop
diff --git a/llvm/test/Transforms/LoopStrengthReduce/depth-limit-overrun.ll b/llvm/test/Transforms/LoopStrengthReduce/depth-limit-overrun.ll
index 9c3698a740992..a46d75ae537ab 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/depth-limit-overrun.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/depth-limit-overrun.ll
@@ -22,10 +22,9 @@ define void @test(i32 %A, i32 %B, i32 %C) {
; DEFAULT: preheader:
; DEFAULT-NEXT: [[I15:%.*]] = shl i32 [[B]], 1
; DEFAULT-NEXT: [[TMP1:%.*]] = mul i32 [[PHI2]], -1
-; DEFAULT-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], -1
-; DEFAULT-NEXT: [[TMP3:%.*]] = sub i32 [[PHI4]], [[TMP2]]
+; DEFAULT-NEXT: [[TMP3:%.*]] = sub i32 [[PHI4]], [[PHI2]]
; DEFAULT-NEXT: [[TMP4:%.*]] = add i32 [[B]], [[PHI4]]
-; DEFAULT-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], [[TMP2]]
+; DEFAULT-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], [[PHI2]]
; DEFAULT-NEXT: [[TMP6:%.*]] = sub i32 14, [[TMP5]]
; DEFAULT-NEXT: [[TMP7:%.*]] = add i32 [[TMP0]], [[PHI2]]
; DEFAULT-NEXT: br label [[INNER_LOOP:%.*]]
@@ -87,10 +86,9 @@ define void @test(i32 %A, i32 %B, i32 %C) {
; LIMIT: preheader:
; LIMIT-NEXT: [[I15:%.*]] = shl i32 [[B]], 1
; LIMIT-NEXT: [[TMP1:%.*]] = mul i32 [[PHI2]], -1
-; LIMIT-NEXT: [[TMP2:%.*]] = mul i32 [[TMP1]], -1
-; LIMIT-NEXT: [[TMP3:%.*]] = sub i32 [[PHI4]], [[TMP2]]
+; LIMIT-NEXT: [[TMP3:%.*]] = sub i32 [[PHI4]], [[PHI2]]
; LIMIT-NEXT: [[TMP4:%.*]] = add i32 [[B]], [[PHI4]]
-; LIMIT-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], [[TMP2]]
+; LIMIT-NEXT: [[TMP5:%.*]] = sub i32 [[TMP4]], [[PHI2]]
; LIMIT-NEXT: [[TMP6:%.*]] = sub i32 14, [[TMP5]]
; LIMIT-NEXT: [[TMP7:%.*]] = add i32 [[TMP0]], [[PHI2]]
; LIMIT-NEXT: br label [[INNER_LOOP:%.*]]
diff --git a/llvm/test/Transforms/LoopStrengthReduce/lsr-rewrite-to-add-one.ll b/llvm/test/Transforms/LoopStrengthReduce/lsr-rewrite-to-add-one.ll
index 74d861cf5168c..d66905ae26f14 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/lsr-rewrite-to-add-one.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/lsr-rewrite-to-add-one.ll
@@ -20,8 +20,7 @@ define i32 @test(i1 %c.1, ptr %src) {
; CHECK-NEXT: [[OR:%.*]] = or i1 [[P]], [[T]]
; CHECK-NEXT: [[ZEXT_OR:%.*]] = zext i1 [[OR]] to i32
; CHECK-NEXT: [[LSR_IV_NEXT]] = add nuw i32 [[LSR_IV]], 1
-; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[LSR_IV_NEXT]], -1
-; CHECK-NEXT: [[LOOP_HEADER_TERMCOND:%.*]] = icmp sgt i32 [[TMP0]], -1050
+; CHECK-NEXT: [[LOOP_HEADER_TERMCOND:%.*]] = icmp sgt i32 [[LSR_IV]], -1050
; CHECK-NEXT: br i1 [[LOOP_HEADER_TERMCOND]], label [[LOOP_HEADER]], label [[EXIT:%.*]]
; CHECK: exit:
; CHECK-NEXT: [[ZEXT_OR_LCSSA:%.*]] = phi i32 [ [[ZEXT_OR]], [[LOOP_LATCH]] ]
diff --git a/llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll b/llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll
index 502042eaf9b9c..007d84f9120e3 100644
--- a/llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll
+++ b/llvm/test/Transforms/LoopStrengthReduce/wrong-hoisting-iv.ll
@@ -16,9 +16,7 @@ define void @test1() {
; CHECK-NEXT: [[VAL4:%.*]] = sub i32 [[VAL]], [[VAL3]]
; CHECK-NEXT: [[VAL5:%.*]] = ashr i32 undef, undef
; CHECK-NEXT: [[VAL6:%.*]] = sub i32 [[VAL4]], [[VAL5]]
-; CHECK-NEXT: [[TMP0:%.*]] = mul i32 [[VAL]], 7
-; CHECK-NEXT: [[TMP1:%.*]] = mul i32 [[VAL3]], 7
-; CHECK-NEXT: [[TMP2:%.*]] = sub i32 [[TMP0]], [[TMP1]]
+; CHECK-NEXT: [[TMP2:%.*]] = mul i32 [[VAL]], 7
; CHECK-NEXT: [[TMP3:%.*]] = mul i32 [[VAL5]], 7
; CHECK-NEXT: [[TMP4:%.*]] = sub i32 [[TMP2]], [[TMP3]]
; CHECK-NEXT: [[TMP5:%.*]] = shl i32 [[VAL6]], 3
@@ -58,10 +56,8 @@ define void @test1() {
; CHECK: bb20.bb15splitsplitsplit_crit_edge:
; CHECK-NEXT: [[TMP14:%.*]] = mul i32 [[VAL]], 3
; CHECK-NEXT: [[TMP15:%.*]] = mul i32 [[VAL1]], [[VAL2]]
-; CHECK-NEXT: [[TMP16:%.*]] = mul i32 [[TMP15]], 3
-; CHECK-NEXT: [[TMP17:%.*]] = sub i32 [[TMP14]], [[TMP16]]
; CHECK-NEXT: [[TMP18:%.*]] = mul i32 [[VAL5]], 3
-; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP17]], [[TMP18]]
+; CHECK-NEXT: [[TMP19:%.*]] = sub i32 [[TMP14]], [[TMP18]]
; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], [[LSR_IV1]]
; CHECK-NEXT: br label [[BB15SPLITSPLITSPLIT]]
; CHECK: bb15splitsplitsplit:
@@ -82,10 +78,8 @@ define void @test1() {
; CHECK: bb26.bb15split_crit_edge:
; CHECK-NEXT: [[TMP28:%.*]] = mul i32 [[VAL]], 5
; CHECK-NEXT: [[TMP29:%.*]] = mul i32 [[VAL1]], [[VAL2]]
-; CHECK-NEXT: [[TMP30:%.*]] = mul i32 [[TMP29]], 5
-; CHECK-NEXT: [[TMP31:%.*]] = sub i32 [[TMP28]], [[TMP30]]
; CHECK-NEXT: [[TMP32:%.*]] = mul i32 [[VAL5]], 5
-; CHECK-NEXT: [[TMP33:%.*]] = sub i32 [[TMP31]], [[TMP32]]
+; CHECK-NEXT: [[TMP33:%.*]] = sub i32 [[TMP28]], [[TMP32]]
; CHECK-NEXT: [[TMP34:%.*]] = add i32 [[TMP33]], [[LSR_IV1]]
; CHECK-NEXT: br label [[BB15SPLIT]]
; CHECK: bb15split:
>From 74e6c4abfe977e72d818117062b8b0b9d7100b03 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Mon, 18 Aug 2025 16:18:37 +0100
Subject: [PATCH 2/4] !fixup add and use CreateBinOpNoWrapFlags.
---
llvm/include/llvm/IR/IRBuilder.h | 13 +++++++++++++
.../Transforms/Utils/ScalarEvolutionExpander.cpp | 12 +++---------
.../LoopIdiom/expand-scev-expand-simplifications.ll | 2 +-
3 files changed, 17 insertions(+), 10 deletions(-)
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index 4ed3d73c4a057..c84496c6a7104 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -1748,6 +1748,19 @@ class IRBuilderBase {
return Insert(BinOp, Name);
}
+ Value *CreateBinOpNoWrapFlags(Instruction::BinaryOps Opc, Value *LHS,
+ Value *RHS, bool IsNUW, bool IsNSW,
+ const Twine &Name = "") {
+ if (Value *V = Folder.FoldBinOp(Opc, LHS, RHS))
+ return V;
+ Instruction *BinOp = BinaryOperator::Create(Opc, LHS, RHS);
+ if (IsNUW)
+ BinOp->setHasNoUnsignedWrap(IsNUW);
+ if (IsNSW)
+ BinOp->setHasNoSignedWrap(IsNSW);
+ return Insert(BinOp, Name);
+ }
+
Value *CreateLogicalAnd(Value *Cond1, Value *Cond2, const Twine &Name = "",
Instruction *MDFrom = nullptr) {
assert(Cond2->getType()->isIntOrIntVectorTy(1));
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index bbe4d16e759f6..e2953cc8e3a26 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -337,15 +337,9 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
}
// If we haven't found this binop, insert it.
- Value *Op = Builder.CreateBinOp(Opcode, LHS, RHS);
- if (auto *BO = dyn_cast<Instruction>(Op)) {
- BO->setDebugLoc(Loc);
- if (Flags & SCEV::FlagNUW)
- BO->setHasNoUnsignedWrap();
- if (Flags & SCEV::FlagNSW)
- BO->setHasNoSignedWrap();
- }
-
+ Builder.SetCurrentDebugLocation(Loc);
+ Value *Op = Builder.CreateBinOpNoWrapFlags(
+ Opcode, LHS, RHS, Flags & SCEV::FlagNUW, Flags & SCEV::FlagNSW);
return Op;
}
diff --git a/llvm/test/Transforms/LoopIdiom/expand-scev-expand-simplifications.ll b/llvm/test/Transforms/LoopIdiom/expand-scev-expand-simplifications.ll
index 07cb1158950f3..e1cbf8d171ce2 100644
--- a/llvm/test/Transforms/LoopIdiom/expand-scev-expand-simplifications.ll
+++ b/llvm/test/Transforms/LoopIdiom/expand-scev-expand-simplifications.ll
@@ -7,7 +7,7 @@ define void @test_simplify_scev_during_expansion_flags(i64 %start) {
; CHECK-LABEL: define void @test_simplify_scev_during_expansion_flags(
; CHECK-SAME: i64 [[START:%.*]]) {
; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: [[START_NEG:%.*]] = sub nsw i64 0, [[START]]
+; CHECK-NEXT: [[START_NEG:%.*]] = sub i64 0, [[START]]
; CHECK-NEXT: [[START_MUL:%.*]] = ashr exact i64 [[START_NEG]], 2
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 404, [[START_NEG]]
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 4 null, i8 0, i64 [[TMP1]], i1 false)
>From 172aa36a2e2a883941a600dafa6519433d8b994d Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 20 Aug 2025 18:23:49 +0100
Subject: [PATCH 3/4] !fixup use Folder.FoldNoWrapBinOp, rename to
CreateNoWrapBinOp.
---
llvm/include/llvm/IR/IRBuilder.h | 7 +++----
llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp | 4 ++--
2 files changed, 5 insertions(+), 6 deletions(-)
diff --git a/llvm/include/llvm/IR/IRBuilder.h b/llvm/include/llvm/IR/IRBuilder.h
index c84496c6a7104..5fd934bba2590 100644
--- a/llvm/include/llvm/IR/IRBuilder.h
+++ b/llvm/include/llvm/IR/IRBuilder.h
@@ -1748,10 +1748,9 @@ class IRBuilderBase {
return Insert(BinOp, Name);
}
- Value *CreateBinOpNoWrapFlags(Instruction::BinaryOps Opc, Value *LHS,
- Value *RHS, bool IsNUW, bool IsNSW,
- const Twine &Name = "") {
- if (Value *V = Folder.FoldBinOp(Opc, LHS, RHS))
+ Value *CreateNoWrapBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS,
+ bool IsNUW, bool IsNSW, const Twine &Name = "") {
+ if (Value *V = Folder.FoldNoWrapBinOp(Opc, LHS, RHS, IsNUW, IsNSW))
return V;
Instruction *BinOp = BinaryOperator::Create(Opc, LHS, RHS);
if (IsNUW)
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index e2953cc8e3a26..a806a085fe1af 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -338,8 +338,8 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
// If we haven't found this binop, insert it.
Builder.SetCurrentDebugLocation(Loc);
- Value *Op = Builder.CreateBinOpNoWrapFlags(
- Opcode, LHS, RHS, Flags & SCEV::FlagNUW, Flags & SCEV::FlagNSW);
+ Value *Op = Builder.CreateNoWrapBinOp(Opcode, LHS, RHS, Flags & SCEV::FlagNUW,
+ Flags & SCEV::FlagNSW);
return Op;
}
>From 75d9db5d7f46aeb8e5f7dabaf2f349af03c68392 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 1 Apr 2026 14:18:48 +0100
Subject: [PATCH 4/4] !fixup update tests, fix polly test failures
---
.../Utils/ScalarEvolutionExpander.cpp | 4 +-
.../AArch64/epilog-vectorization-factors.ll | 47 +++++--------------
.../epilog-vectorization-reductions.ll | 46 +++++-------------
.../version-stride-with-integer-casts.ll | 2 -
polly/lib/CodeGen/IslNodeBuilder.cpp | 18 +++++--
5 files changed, 43 insertions(+), 74 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
index a806a085fe1af..9d248ad7259e3 100644
--- a/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
+++ b/llvm/lib/Transforms/Utils/ScalarEvolutionExpander.cpp
@@ -1336,7 +1336,9 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(),
S->getNoWrapFlags(SCEV::FlagNW)));
BasicBlock::iterator NewInsertPt =
- findInsertPointAfter(cast<Instruction>(V), &*Builder.GetInsertPoint());
+ isa<Instruction>(V) ? findInsertPointAfter(cast<Instruction>(V),
+ &*Builder.GetInsertPoint())
+ : Builder.GetInsertPoint();
V = expand(SE.getTruncateExpr(SE.getUnknown(V), Ty), NewInsertPt);
return V;
}
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
index 28684fe527016..b11d8cd0c57d7 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/epilog-vectorization-factors.ll
@@ -430,26 +430,11 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" {
; CHECK-NEXT: [[PTR_START:%.*]] = inttoptr i64 [[X:%.*]] to ptr
; CHECK-NEXT: [[ADD:%.*]] = add i64 [[X]], 40
; CHECK-NEXT: [[PTR_END:%.*]] = inttoptr i64 [[ADD]] to ptr
-; CHECK-NEXT: [[TMP0:%.*]] = sub i64 [[ADD]], [[X]]
-; CHECK-NEXT: [[TMP1:%.*]] = lshr i64 [[TMP0]], 2
-; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
-; CHECK: vector.scevcheck:
-; CHECK-NEXT: [[TMP3:%.*]] = trunc i64 [[ADD]] to i2
-; CHECK-NEXT: [[TMP4:%.*]] = trunc i64 [[X]] to i2
-; CHECK-NEXT: [[TMP5:%.*]] = sub i2 [[TMP3]], [[TMP4]]
-; CHECK-NEXT: [[TMP6:%.*]] = zext i2 [[TMP5]] to i64
-; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP6]], 0
-; CHECK-NEXT: br i1 [[IDENT_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VECTOR_MAIN_LOOP_ITER_CHECK:%.*]]
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.main.loop.iter.check:
-; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP2]], 16
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 true, label [[VEC_EPILOG_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 16
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
-; CHECK-NEXT: [[TMP12:%.*]] = mul i64 [[N_VEC]], 4
-; CHECK-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[TMP12]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 0
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
@@ -463,20 +448,15 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" {
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP9]], align 4
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[TMP10]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
+; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
; CHECK: middle.block:
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[CMP_N]], label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
+; CHECK-NEXT: br i1 false, label [[EXIT:%.*]], label [[VEC_EPILOG_ITER_CHECK:%.*]]
; CHECK: vec.epilog.iter.check:
-; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4
-; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11]]
+; CHECK-NEXT: br i1 false, label [[VEC_EPILOG_SCALAR_PH]], label [[VEC_EPILOG_PH]], !prof [[PROF11]]
; CHECK: vec.epilog.ph:
-; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT: [[N_MOD_VF1:%.*]] = urem i64 [[TMP2]], 4
-; CHECK-NEXT: [[N_VEC2:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF1]]
-; CHECK-NEXT: [[TMP13:%.*]] = mul i64 [[N_VEC2]], 4
-; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[TMP13]]
+; CHECK-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 0, [[VEC_EPILOG_ITER_CHECK]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 32
; CHECK-NEXT: br label [[VEC_EPILOG_VECTOR_BODY:%.*]]
; CHECK: vec.epilog.vector.body:
; CHECK-NEXT: [[INDEX3:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], [[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT5:%.*]], [[VEC_EPILOG_VECTOR_BODY]] ]
@@ -484,13 +464,12 @@ define void @trip_count_based_on_ptrtoint(i64 %x) "target-cpu"="apple-m1" {
; CHECK-NEXT: [[NEXT_GEP4:%.*]] = getelementptr i8, ptr [[PTR_START]], i64 [[OFFSET_IDX]]
; CHECK-NEXT: store <4 x i32> zeroinitializer, ptr [[NEXT_GEP4]], align 4
; CHECK-NEXT: [[INDEX_NEXT5]] = add nuw i64 [[INDEX3]], 4
-; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT5]], [[N_VEC2]]
-; CHECK-NEXT: br i1 [[TMP15]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
+; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT5]], 8
+; CHECK-NEXT: br i1 [[TMP11]], label [[VEC_EPILOG_MIDDLE_BLOCK:%.*]], label [[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: vec.epilog.middle.block:
-; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC2]]
-; CHECK-NEXT: br i1 [[CMP_N6]], label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT: br i1 false, label [[EXIT]], label [[VEC_EPILOG_SCALAR_PH]]
; CHECK: vec.epilog.scalar.ph:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP14]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[IND_END]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START]], [[VECTOR_SCEVCHECK]] ], [ [[PTR_START]], [[ITER_CHECK:%.*]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi ptr [ [[TMP6]], [[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP0]], [[VEC_EPILOG_ITER_CHECK]] ], [ [[PTR_START]], [[ITER_CHECK:%.*]] ]
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi ptr [ [[BC_RESUME_VAL]], [[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
index f08f7ac0fd4e2..504b38a47aea8 100644
--- a/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
+++ b/llvm/test/Transforms/LoopVectorize/epilog-vectorization-reductions.ll
@@ -1015,27 +1015,11 @@ define i64 @reduction_with_ptr_iv_inttoptr_exit_cond(ptr %base, ptr %src) {
; CHECK-NEXT: [[END:%.*]] = inttoptr i64 [[ADD]] to ptr
; CHECK-NEXT: [[START:%.*]] = getelementptr i8, ptr [[BASE]], i64 24
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[ADD]], -48
-; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[PTRTOINT]]
-; CHECK-NEXT: [[TMP2:%.*]] = mul i64 [[TMP1]], 768614336404564651
-; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 [[TMP2]], 3
-; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP4]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
+; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
; CHECK: [[VECTOR_SCEVCHECK]]:
-; CHECK-NEXT: [[TMP5:%.*]] = trunc i64 [[ADD]] to i3
-; CHECK-NEXT: [[TMP6:%.*]] = trunc i64 [[PTRTOINT]] to i3
-; CHECK-NEXT: [[TMP7:%.*]] = sub i3 [[TMP5]], [[TMP6]]
-; CHECK-NEXT: [[TMP8:%.*]] = zext i3 [[TMP7]] to i64
-; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i64 [[TMP8]], 0
-; CHECK-NEXT: br i1 [[IDENT_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
-; CHECK: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
-; CHECK-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 [[TMP4]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP4]], 4
-; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF]]
-; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[N_VEC]], 24
-; CHECK-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP15]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[START]], i64 96
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
@@ -1047,21 +1031,16 @@ define i64 @reduction_with_ptr_iv_inttoptr_exit_cond(ptr %base, ptr %src) {
; CHECK-NEXT: [[TMP11:%.*]] = zext <4 x i8> [[BROADCAST_SPLAT]] to <4 x i64>
; CHECK-NEXT: [[TMP12]] = add <4 x i64> [[TMP9]], [[TMP11]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP13]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 4
+; CHECK-NEXT: br i1 [[TMP6]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP12]])
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
+; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
; CHECK: [[VEC_EPILOG_ITER_CHECK]]:
-; CHECK-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], 4
-; CHECK-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]]
+; CHECK-NEXT: br i1 true, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF3]]
; CHECK: [[VEC_EPILOG_PH]]:
-; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP14]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
-; CHECK-NEXT: [[N_MOD_VF1:%.*]] = urem i64 [[TMP4]], 4
-; CHECK-NEXT: [[N_VEC2:%.*]] = sub i64 [[TMP4]], [[N_MOD_VF1]]
-; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[N_VEC2]], 24
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr i8, ptr [[START]], i64 [[TMP16]]
+; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP14]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[START]], i64 96
; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i64> zeroinitializer, i64 [[BC_MERGE_RDX]], i32 0
; CHECK-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
; CHECK: [[VEC_EPILOG_VECTOR_BODY]]:
@@ -1074,11 +1053,10 @@ define i64 @reduction_with_ptr_iv_inttoptr_exit_cond(ptr %base, ptr %src) {
; CHECK-NEXT: br label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]]
; CHECK: [[VEC_EPILOG_MIDDLE_BLOCK]]:
; CHECK-NEXT: [[TMP23:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP22]])
-; CHECK-NEXT: [[CMP_N6:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC2]]
-; CHECK-NEXT: br i1 [[CMP_N6]], label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
+; CHECK-NEXT: br i1 true, label %[[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
; CHECK: [[VEC_EPILOG_SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi ptr [ [[TMP17]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP24]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[VECTOR_SCEVCHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
-; CHECK-NEXT: [[BC_MERGE_RDX8:%.*]] = phi i64 [ [[TMP23]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP14]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_SCEVCHECK]] ], [ 0, %[[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL7:%.*]] = phi ptr [ [[TMP8]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP1]], %[[VEC_EPILOG_ITER_CHECK]] ], [ [[START]], %[[ITER_CHECK]] ]
+; CHECK-NEXT: [[BC_MERGE_RDX8:%.*]] = phi i64 [ [[TMP23]], %[[VEC_EPILOG_MIDDLE_BLOCK]] ], [ [[TMP14]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[ITER_CHECK]] ]
; CHECK-NEXT: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK-NEXT: [[IV_PTR:%.*]] = phi ptr [ [[BC_RESUME_VAL7]], %[[VEC_EPILOG_SCALAR_PH]] ], [ [[IV_PTR_NEXT:%.*]], %[[LOOP]] ]
diff --git a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
index 6cb76f86aeb5a..a9b15f7c206d2 100644
--- a/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
+++ b/llvm/test/Transforms/LoopVectorize/version-stride-with-integer-casts.ll
@@ -399,8 +399,6 @@ define void @zext_of_i1_stride(i1 %g, ptr %dst) mustprogress {
; CHECK-NEXT: entry:
; CHECK-NEXT: [[G_16:%.*]] = zext i1 [[G]] to i16
; CHECK-NEXT: [[G_64:%.*]] = zext i1 [[G]] to i64
-; CHECK-NEXT: [[TMP0:%.*]] = udiv i64 15, [[G_64]]
-; CHECK-NEXT: [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
; CHECK-NEXT: br label [[VECTOR_SCEVCHECK:%.*]]
; CHECK: vector.scevcheck:
; CHECK-NEXT: [[IDENT_CHECK:%.*]] = icmp ne i1 [[G]], true
diff --git a/polly/lib/CodeGen/IslNodeBuilder.cpp b/polly/lib/CodeGen/IslNodeBuilder.cpp
index d620ac768abd6..229a77bbfb7fd 100644
--- a/polly/lib/CodeGen/IslNodeBuilder.cpp
+++ b/polly/lib/CodeGen/IslNodeBuilder.cpp
@@ -587,6 +587,8 @@ void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) {
ScalarEvolution *CallerSE = GenSE;
ValueMapT CallerGlobals = ValueMap;
IslExprBuilder::IDToValueTy IDToValueCopy = IDToValue;
+ MapVector<const Loop *, const SCEV *> OutsideLoopIterationsCopy =
+ OutsideLoopIterations;
// Get the analyses for the subfunction. ParallelLoopGenerator already create
// DominatorTree and LoopInfo for us.
@@ -648,6 +650,18 @@ void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) {
}
IDToValue[IteratorID] = IV;
+ // Also update OutsideLoopIterations to use values from the subfunction.
+ // SCEVExpander may fold identity operations (e.g. x+0 -> x), returning the
+ // original loop PHI instead of a new instruction. We need to remap these
+ // values through NewValues so GenSE (now SubSE) doesn't operate on values
+ // from the caller function.
+ for (auto &[L, S] : OutsideLoopIterations) {
+ if (auto *U = dyn_cast<SCEVUnknown>(S)) {
+ if (Value *NewVal = NewValues.lookup(U->getValue()))
+ OutsideLoopIterations[L] = GenSE->getUnknown(NewVal);
+ }
+ }
+
#ifndef NDEBUG
// Check whether the maps now exclusively refer to SubFn values.
for (auto &[OldVal, SubVal] : ValueMap) {
@@ -680,14 +694,12 @@ void IslNodeBuilder::createForParallel(__isl_take isl_ast_node *For) {
GenSE = CallerSE;
IDToValue = std::move(IDToValueCopy);
ValueMap = std::move(CallerGlobals);
+ OutsideLoopIterations = std::move(OutsideLoopIterationsCopy);
ExprBuilder.switchGeneratedFunc(CallerFn, CallerDT, CallerLI, CallerSE);
RegionGen.switchGeneratedFunc(CallerFn, CallerDT, CallerLI, CallerSE);
BlockGen.switchGeneratedFunc(CallerFn, CallerDT, CallerLI, CallerSE);
Builder.SetInsertPoint(AfterLoop);
- for (const Loop *L : Loops)
- OutsideLoopIterations.erase(L);
-
isl_ast_node_free(For);
isl_ast_expr_free(Iterator);
isl_id_free(IteratorID);
More information about the llvm-commits
mailing list