[llvm] [InstCombine] Extend ADD+GEP->GEP+GEP combine to disjoint or. (PR #76981)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 4 09:38:50 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
---
Patch is 282.66 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/76981.diff
20 Files Affected:
- (modified) llvm/lib/Transforms/InstCombine/InstructionCombining.cpp (+1-1)
- (modified) llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll (+60-60)
- (modified) llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll (+310-314)
- (modified) llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll (+13-15)
- (modified) llvm/test/Transforms/LoopVectorize/SystemZ/addressing.ll (+11-11)
- (modified) llvm/test/Transforms/LoopVectorize/X86/float-induction-x86.ll (+44-44)
- (modified) llvm/test/Transforms/LoopVectorize/X86/interleaving.ll (+58-58)
- (modified) llvm/test/Transforms/LoopVectorize/X86/parallel-loops.ll (+27-27)
- (modified) llvm/test/Transforms/LoopVectorize/X86/small-size.ll (+24-24)
- (modified) llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-accesses-masked-group.ll (+21-27)
- (modified) llvm/test/Transforms/LoopVectorize/X86/x86-interleaved-store-accesses-with-gaps.ll (+4-8)
- (modified) llvm/test/Transforms/LoopVectorize/consecutive-ptr-uniforms.ll (+24-18)
- (modified) llvm/test/Transforms/LoopVectorize/float-induction.ll (+70-70)
- (modified) llvm/test/Transforms/LoopVectorize/forked-pointers.ll (+26-26)
- (modified) llvm/test/Transforms/LoopVectorize/induction.ll (+9-15)
- (modified) llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll (+69-73)
- (modified) llvm/test/Transforms/LoopVectorize/reduction-inloop-cond.ll (+36-36)
- (modified) llvm/test/Transforms/LoopVectorize/reduction-inloop-pred.ll (+52-52)
- (modified) llvm/test/Transforms/LoopVectorize/reduction-inloop-uf4.ll (+60-60)
- (modified) llvm/test/Transforms/LoopVectorize/reduction-predselect.ll (+19-19)
``````````diff
diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index f3181dc14792c8..6345a62ad99bf2 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2527,7 +2527,7 @@ Instruction *InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// Try to replace ADD + GEP with GEP + GEP.
Value *Idx1, *Idx2;
if (match(GEP.getOperand(1),
- m_OneUse(m_Add(m_Value(Idx1), m_Value(Idx2))))) {
+ m_OneUse(m_AddLike(m_Value(Idx1), m_Value(Idx2))))) {
// %idx = add i64 %idx1, %idx2
// %gep = getelementptr i32, ptr %ptr, i64 %idx
// as:
diff --git a/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll b/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
index 75222e5ee13716..74195c24578d2c 100644
--- a/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
+++ b/llvm/test/Transforms/LoopUnroll/runtime-multiexit-heuristic.ll
@@ -35,62 +35,62 @@ define i32 @test1(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = or disjoint i64 [[INDVARS_IV]], 1
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_1:%.*]]
; CHECK: for.exiting_block.1:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]]
; CHECK: latch.1:
-; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
-; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
-; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = or disjoint i64 [[INDVARS_IV]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i32, ptr [[TMP4]], i64 1
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP5]], [[ADD]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_2:%.*]]
; CHECK: for.exiting_block.2:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]]
; CHECK: latch.2:
-; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
-; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
-; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = or disjoint i64 [[INDVARS_IV]], 3
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr i32, ptr [[TMP6]], i64 2
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP7]], [[ADD_1]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_3:%.*]]
; CHECK: for.exiting_block.3:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]]
; CHECK: latch.3:
-; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
-; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
-; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = or disjoint i64 [[INDVARS_IV]], 4
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr i32, ptr [[TMP8]], i64 3
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP9]], [[ADD_2]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_4:%.*]]
; CHECK: for.exiting_block.4:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]]
; CHECK: latch.4:
-; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
-; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = or disjoint i64 [[INDVARS_IV]], 5
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr i32, ptr [[TMP10]], i64 4
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP11]], [[ADD_3]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_5:%.*]]
; CHECK: for.exiting_block.5:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]]
; CHECK: latch.5:
-; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
-; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
-; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = or disjoint i64 [[INDVARS_IV]], 6
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr i32, ptr [[TMP12]], i64 5
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
+; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP13]], [[ADD_4]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_6:%.*]]
; CHECK: for.exiting_block.6:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]]
; CHECK: latch.6:
-; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
-; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
-; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = or disjoint i64 [[INDVARS_IV]], 7
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr i32, ptr [[TMP14]], i64 6
+; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
+; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP15]], [[ADD_5]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_7:%.*]]
; CHECK: for.exiting_block.7:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]]
; CHECK: latch.7:
-; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
-; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
-; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr i32, ptr [[TMP16]], i64 7
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
+; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP17]], [[ADD_6]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
; CHECK-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8
; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
@@ -115,8 +115,8 @@ define i32 @test1(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT3:%.*]], label [[LATCH_EPIL]]
; CHECK: latch.epil:
; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_EPIL]]
-; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
-; CHECK-NEXT: [[ADD_EPIL]] = add nsw i32 [[TMP11]], [[SUM_02_EPIL]]
+; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
+; CHECK-NEXT: [[ADD_EPIL]] = add nsw i32 [[TMP18]], [[SUM_02_EPIL]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
; CHECK-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
@@ -763,62 +763,62 @@ define i32 @test5(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[A:%.*]], i64 [[INDVARS_IV]]
; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], [[SUM_02]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT:%.*]] = or disjoint i64 [[INDVARS_IV]], 1
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_1:%.*]]
; CHECK: for.exiting_block.1:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_1:%.*]]
; CHECK: latch.1:
-; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT]]
-; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
-; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP4]], [[ADD]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_1:%.*]] = or disjoint i64 [[INDVARS_IV]], 2
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr i32, ptr [[TMP4]], i64 1
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[ADD_1:%.*]] = add nsw i32 [[TMP5]], [[ADD]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_2:%.*]]
; CHECK: for.exiting_block.2:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_2:%.*]]
; CHECK: latch.2:
-; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_1]]
-; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
-; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP5]], [[ADD_1]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_2:%.*]] = or disjoint i64 [[INDVARS_IV]], 3
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr i32, ptr [[TMP6]], i64 2
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[ADD_2:%.*]] = add nsw i32 [[TMP7]], [[ADD_1]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_3:%.*]]
; CHECK: for.exiting_block.3:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_3:%.*]]
; CHECK: latch.3:
-; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_2]]
-; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
-; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP6]], [[ADD_2]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_3:%.*]] = or disjoint i64 [[INDVARS_IV]], 4
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr i32, ptr [[TMP8]], i64 3
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[ADD_3:%.*]] = add nsw i32 [[TMP9]], [[ADD_2]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_4:%.*]]
; CHECK: for.exiting_block.4:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_4:%.*]]
; CHECK: latch.4:
-; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_3]]
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
-; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP7]], [[ADD_3]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_4:%.*]] = or disjoint i64 [[INDVARS_IV]], 5
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr i32, ptr [[TMP10]], i64 4
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT: [[ADD_4:%.*]] = add nsw i32 [[TMP11]], [[ADD_3]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_5:%.*]]
; CHECK: for.exiting_block.5:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_5:%.*]]
; CHECK: latch.5:
-; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_4]]
-; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
-; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP8]], [[ADD_4]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_5:%.*]] = or disjoint i64 [[INDVARS_IV]], 6
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr i32, ptr [[TMP12]], i64 5
+; CHECK-NEXT: [[TMP13:%.*]] = load i32, ptr [[ARRAYIDX_5]], align 4
+; CHECK-NEXT: [[ADD_5:%.*]] = add nsw i32 [[TMP13]], [[ADD_4]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_6:%.*]]
; CHECK: for.exiting_block.6:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_6:%.*]]
; CHECK: latch.6:
-; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_5]]
-; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
-; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP9]], [[ADD_5]]
-; CHECK-NEXT: [[INDVARS_IV_NEXT_6:%.*]] = or disjoint i64 [[INDVARS_IV]], 7
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr i32, ptr [[TMP14]], i64 6
+; CHECK-NEXT: [[TMP15:%.*]] = load i32, ptr [[ARRAYIDX_6]], align 4
+; CHECK-NEXT: [[ADD_6:%.*]] = add nsw i32 [[TMP15]], [[ADD_5]]
; CHECK-NEXT: br label [[FOR_EXITING_BLOCK_7:%.*]]
; CHECK: for.exiting_block.7:
; CHECK-NEXT: br i1 false, label [[OTHEREXIT_LOOPEXIT]], label [[LATCH_7]]
; CHECK: latch.7:
-; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_NEXT_6]]
-; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
-; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP10]], [[ADD_6]]
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[A]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr i32, ptr [[TMP16]], i64 7
+; CHECK-NEXT: [[TMP17:%.*]] = load i32, ptr [[ARRAYIDX_7]], align 4
+; CHECK-NEXT: [[ADD_7]] = add nsw i32 [[TMP17]], [[ADD_6]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_7]] = add i64 [[INDVARS_IV]], 8
; CHECK-NEXT: [[NITER_NEXT_7]] = add i64 [[NITER]], 8
; CHECK-NEXT: [[NITER_NCMP_7:%.*]] = icmp eq i64 [[NITER_NEXT_7]], [[UNROLL_ITER]]
@@ -843,8 +843,8 @@ define i32 @test5(ptr nocapture %a, i64 %n) {
; CHECK-NEXT: br i1 [[CMP_EPIL]], label [[OTHEREXIT_LOOPEXIT3:%.*]], label [[LATCH_EPIL]]
; CHECK: latch.epil:
; CHECK-NEXT: [[ARRAYIDX_EPIL:%.*]] = getelementptr inbounds i32, ptr [[A]], i64 [[INDVARS_IV_EPIL]]
-; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
-; CHECK-NEXT: [[ADD_EPIL]] = add nsw i32 [[TMP11]], [[SUM_02_EPIL]]
+; CHECK-NEXT: [[TMP18:%.*]] = load i32, ptr [[ARRAYIDX_EPIL]], align 4
+; CHECK-NEXT: [[ADD_EPIL]] = add nsw i32 [[TMP18]], [[SUM_02_EPIL]]
; CHECK-NEXT: [[INDVARS_IV_NEXT_EPIL]] = add i64 [[INDVARS_IV_EPIL]], 1
; CHECK-NEXT: [[EPIL_ITER_NEXT]] = add i64 [[EPIL_ITER]], 1
; CHECK-NEXT: [[EPIL_ITER_CMP_NOT:%.*]] = icmp eq i64 [[EPIL_ITER_NEXT]], [[XTRAITER]]
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
index f600a0d5877d33..bed4b83edc57fa 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/sve-interleaved-accesses.ll
@@ -26,8 +26,8 @@ define void @test_array_load2_store2(i32 %C, i32 %D) #1 {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP9:%.*]] = shl nuw nsw i64 [[TMP8]], 2
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[C:%.*]], i64 0
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[D:%.*]], i64 0
@@ -36,19 +36,19 @@ define void @test_array_load2_store2(i32 %C, i32 %D) #1 {
; CHECK: vector.body:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
; CHECK-NEXT: [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 1
-; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [1024 x i32], ptr @AB, i64 0, i64 [[OFFSET_IDX]]
-; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP0]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds [1024 x i32], ptr @AB, i64 0, i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <vscale x 8 x i32>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[STRIDED_VEC:%.*]] = call { <vscale x 4 x i32>, <vscale x 4 x i32> } @llvm.experimental.vector.deinterleave2.nxv8i32(<vscale x 8 x i32> [[WIDE_VEC]])
-; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
-; CHECK-NEXT: [[TMP2:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
-; CHECK-NEXT: [[TMP3:%.*]] = or disjoint i64 [[OFFSET_IDX]], 1
-; CHECK-NEXT: [[TMP4:%.*]] = add nsw <vscale x 4 x i32> [[TMP1]], [[BROADCAST_SPLAT]]
-; CHECK-NEXT: [[TMP5:%.*]] = mul nsw <vscale x 4 x i32> [[TMP2]], [[BROADCAST_SPLAT2]]
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1024 x i32], ptr @CD, i64 0, i64 [[TMP3]]
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[TMP6]], i64 -1
-; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.experimental.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP4]], <vscale x 4 x i32> [[TMP5]])
-; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP7]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
+; CHECK-NEXT: [[TMP3:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 0
+; CHECK-NEXT: [[TMP4:%.*]] = extractvalue { <vscale x 4 x i32>, <vscale x 4 x i32> } [[STRIDED_VEC]], 1
+; CHECK-NEXT: [[TMP5:%.*]] = or disjoint i64 [[OFFSET_IDX]], 1
+; CHECK-NEXT: [[TMP6:%.*]] = add nsw <vscale x 4 x i32> [[TMP3]], [[BROADCAST_SPLAT]]
+; CHECK-NEXT: [[TMP7:%.*]] = mul nsw <vscale x 4 x i32> [[TMP4]], [[BROADCAST_SPLAT2]]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1024 x i32], ptr @CD, i64 0, i64 [[TMP5]]
+; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, ptr [[TMP8]], i64 -1
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = call <vscale x 8 x i32> @llvm.experimental.vector.interleave2.nxv8i32(<vscale x 4 x i32> [[TMP6]], <vscale x 4 x i32> [[TMP7]])
+; CHECK-NEXT: store <vscale x 8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP9]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP1]]
; CHECK-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
; CHECK-NEXT: br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: middle.block:
@@ -106,13 +106,13 @@ define void @test_array_load2_i16_store2(i32 %C, i32 %D) #1 {
; CHECK-NEXT: entry:
; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
-; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP15:%.*]] = shl nuw nsw i64 [[TMP14]], 2
-; CHECK-NEXT: [[TMP0:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
-; CHECK-NEXT: [[TMP1:%.*]] = shl <vscale x 4 x i64> [[TMP0]], shufflevector (<vscale x 4 x i64> insertelement (<vscale x 4 x i64> poison, i64 1, i64 0), <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer)
-; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
-; CHECK-NEXT: [[TMP3:%.*]] = shl nuw nsw i64 [[TMP2]], 3
-; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP3]], i64 0
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = shl nuw nsw i64 [[TMP0]], 2
+; CHECK-NEXT: [[TMP2:%.*]] = call <vscale x 4 x i64> @llvm.experimental.stepvector.nxv4i64()
+; CHECK-NEXT: [[TMP3:%....
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/76981
More information about the llvm-commits
mailing list