[llvm] [InstCombine] Preserve metadata from orig load in select fold. (PR #115605)
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 16 03:43:56 PST 2025
https://github.com/fhahn updated https://github.com/llvm/llvm-project/pull/115605
>From 6b2ba3f6efc7a6cf1281f98215ed4e42a82e2fbf Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Wed, 15 Jan 2025 21:44:46 +0000
Subject: [PATCH 1/3] [IR] Provide array with poison-generating metadata IDs.
---
llvm/include/llvm/IR/Instruction.h | 3 +++
llvm/lib/IR/Instruction.cpp | 10 ++++------
2 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/llvm/include/llvm/IR/Instruction.h b/llvm/include/llvm/IR/Instruction.h
index aa480aa8d98636..3a87db78bde779 100644
--- a/llvm/include/llvm/IR/Instruction.h
+++ b/llvm/include/llvm/IR/Instruction.h
@@ -503,6 +503,9 @@ class Instruction : public User,
/// Determine whether the the nneg flag is set.
bool hasNonNeg() const LLVM_READONLY;
+ constexpr static const unsigned PoisonGeneratingMetadataIDs[] = {
+ LLVMContext::MD_range, LLVMContext::MD_nonnull, LLVMContext::MD_align};
+
/// Return true if this operator has flags which may cause this instruction
/// to evaluate to poison despite having non-poison inputs.
bool hasPoisonGeneratingFlags() const LLVM_READONLY;
diff --git a/llvm/lib/IR/Instruction.cpp b/llvm/lib/IR/Instruction.cpp
index 147cd84125c8d1..12aefd8b1e96c1 100644
--- a/llvm/lib/IR/Instruction.cpp
+++ b/llvm/lib/IR/Instruction.cpp
@@ -458,9 +458,8 @@ void Instruction::dropPoisonGeneratingFlags() {
}
bool Instruction::hasPoisonGeneratingMetadata() const {
- return hasMetadata(LLVMContext::MD_range) ||
- hasMetadata(LLVMContext::MD_nonnull) ||
- hasMetadata(LLVMContext::MD_align);
+ return any_of(PoisonGeneratingMetadataIDs,
+ [this](unsigned ID) { return hasMetadata(ID); });
}
bool Instruction::hasNonDebugLocLoopMetadata() const {
@@ -487,9 +486,8 @@ bool Instruction::hasNonDebugLocLoopMetadata() const {
}
void Instruction::dropPoisonGeneratingMetadata() {
- eraseMetadata(LLVMContext::MD_range);
- eraseMetadata(LLVMContext::MD_nonnull);
- eraseMetadata(LLVMContext::MD_align);
+ for (unsigned ID : PoisonGeneratingMetadataIDs)
+ eraseMetadata(ID);
}
bool Instruction::hasPoisonGeneratingReturnAttributes() const {
>From ca701a1a9e8cdd10a69dd0f6cd26d4587f3861d5 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Sat, 9 Nov 2024 11:19:56 +0000
Subject: [PATCH 2/3] [InstCombine] Preserve metadata from orig load in select
fold.
When replacing load with a select on the address with a select and 2
loads of the values, copy the metadata from the original load to the
newly created loads, which are placed at the same place as the original
loads.
One of the loads will load from an address that won't be accessed
originally, but the poison result (if metadata doesn't doesn't hold) will
be discarded by the newly created select. We cannot copy any metadata
that would cause immediate UB, so !noundef and !invariant.load are
skipped.
Fixes https://github.com/llvm/llvm-project/issues/115595.
---
.../InstCombineLoadStoreAlloca.cpp | 19 +++++++++++++++
.../InstCombine/loadstore-metadata.ll | 8 +++----
.../X86/preserve-access-group.ll | 23 +++++++++----------
3 files changed, 34 insertions(+), 16 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index f87a4a58470402..cf40c5c02a6bba 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1060,6 +1060,25 @@ Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
V2->setAlignment(Alignment);
V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
+ // Can copy any metadata that cannot trigger UB. In particular do not
+ // copy !noundef or !invariant.load.
+ unsigned SafeMDKinds[] = {LLVMContext::MD_dbg,
+ LLVMContext::MD_tbaa,
+ LLVMContext::MD_prof,
+ LLVMContext::MD_fpmath,
+ LLVMContext::MD_tbaa_struct,
+ LLVMContext::MD_alias_scope,
+ LLVMContext::MD_noalias,
+ LLVMContext::MD_nontemporal,
+ LLVMContext::MD_mem_parallel_loop_access,
+ LLVMContext::MD_access_group,
+ LLVMContext::MD_nonnull,
+ LLVMContext::MD_align,
+ LLVMContext::MD_dereferenceable,
+ LLVMContext::MD_dereferenceable_or_null,
+ LLVMContext::MD_range};
+ V1->copyMetadata(LI, SafeMDKinds);
+ V2->copyMetadata(LI, SafeMDKinds);
return SelectInst::Create(SI->getCondition(), V1, V2);
}
diff --git a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
index 1aae4ed1eb2b57..513aba12cdbbf5 100644
--- a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
+++ b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
@@ -186,12 +186,12 @@ entry:
ret i32 %c
}
-; FIXME: Should preserve none-UB metadata on loads.
+; Preserve none-UB metadata on loads.
define ptr @preserve_load_metadata_after_select_transform1(i1 %c, ptr dereferenceable(8) %a, ptr dereferenceable(8) %b) {
; CHECK-LABEL: @preserve_load_metadata_after_select_transform1(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[B_VAL:%.*]] = load ptr, ptr [[B:%.*]], align 1
-; CHECK-NEXT: [[A_VAL:%.*]] = load ptr, ptr [[A:%.*]], align 1
+; CHECK-NEXT: [[B_VAL:%.*]] = load ptr, ptr [[B:%.*]], align 1, !tbaa [[TBAA0]], !dereferenceable [[META8]], !llvm.access.group [[META6]]
+; CHECK-NEXT: [[A_VAL:%.*]] = load ptr, ptr [[A:%.*]], align 1, !tbaa [[TBAA0]], !dereferenceable [[META8]], !llvm.access.group [[META6]]
; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[C:%.*]], ptr [[B_VAL]], ptr [[A_VAL]]
; CHECK-NEXT: ret ptr [[L_SEL]]
;
@@ -201,7 +201,7 @@ entry:
ret ptr %l.sel
}
-; FIXME: Should preserve none-UB metadata on loads.
+; Preserve none-UB metadata on loads.
define i32 @preserve_load_metadata_after_select_transform_range(i1 %c, ptr dereferenceable(8) %a, ptr dereferenceable(8) %b) {
; CHECK-LABEL: @preserve_load_metadata_after_select_transform_range(
; CHECK-NEXT: entry:
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll b/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll
index 7bb22e2e9f5b54..9f67496f2ef159 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll
@@ -4,7 +4,6 @@
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
-; FIXME: !llvm.access.group should be preserved, loop should be vectorized.
; End-to-end test for https://github.com/llvm/llvm-project/issues/115595.
define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %face_cell, ptr noalias noundef %x, ptr noalias noundef %y) #0 {
; CHECK-LABEL: define void @test(
@@ -15,16 +14,16 @@ define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %f
; CHECK: [[FOR_BODY_PREHEADER]]:
; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[NFACE]] to i64
; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[TMP0]]
-; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NFACE]], 4
-; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_BODY_PREHEADER14:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[NFACE]], 4
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER14:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP0]], 2147483644
+; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_EPIL]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4, !tbaa [[TBAA0:![0-9]+]], !llvm.access.group [[ACC_GRP4:![0-9]+]]
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_EPIL]]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[INDEX]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]], !llvm.access.group [[ACC_GRP4:![0-9]+]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[INVARIANT_GEP]], i64 [[INDEX]]
; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[Y]], <4 x i64> [[TMP3]]
@@ -35,14 +34,14 @@ define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %f
; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast olt <4 x double> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER13]]
; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x double> [[WIDE_MASKED_GATHER13]], <4 x double> [[WIDE_MASKED_GATHER]]
; CHECK-NEXT: tail call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP8]], <4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV_EPIL]], 4
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[UNROLL_ITER]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UNROLL_ITER]], [[TMP0]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY_PREHEADER14]]
; CHECK: [[FOR_BODY_PREHEADER14]]:
-; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[UNROLL_ITER]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_COND_CLEANUP]]:
; CHECK-NEXT: ret void
>From da0a211351c4fa4aacdd70b1a708e7445bf7f674 Mon Sep 17 00:00:00 2001
From: Florian Hahn <flo at fhahn.com>
Date: Thu, 16 Jan 2025 11:13:20 +0000
Subject: [PATCH 3/3] !fixup use PoisonGeneratingMetadataIDs.
---
.../InstCombineLoadStoreAlloca.cpp | 23 ++++---------------
.../InstCombine/loadstore-metadata.ll | 19 +++++++--------
.../X86/preserve-access-group.ll | 23 ++++++++++---------
3 files changed, 26 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index cf40c5c02a6bba..f4f98e9addf56f 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -1060,25 +1060,10 @@ Instruction *InstCombinerImpl::visitLoadInst(LoadInst &LI) {
V1->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
V2->setAlignment(Alignment);
V2->setAtomic(LI.getOrdering(), LI.getSyncScopeID());
- // Can copy any metadata that cannot trigger UB. In particular do not
- // copy !noundef or !invariant.load.
- unsigned SafeMDKinds[] = {LLVMContext::MD_dbg,
- LLVMContext::MD_tbaa,
- LLVMContext::MD_prof,
- LLVMContext::MD_fpmath,
- LLVMContext::MD_tbaa_struct,
- LLVMContext::MD_alias_scope,
- LLVMContext::MD_noalias,
- LLVMContext::MD_nontemporal,
- LLVMContext::MD_mem_parallel_loop_access,
- LLVMContext::MD_access_group,
- LLVMContext::MD_nonnull,
- LLVMContext::MD_align,
- LLVMContext::MD_dereferenceable,
- LLVMContext::MD_dereferenceable_or_null,
- LLVMContext::MD_range};
- V1->copyMetadata(LI, SafeMDKinds);
- V2->copyMetadata(LI, SafeMDKinds);
+ // It is safe to copy any metadata that does not trigger UB. Copy any
+ // poison-generating metadata.
+ V1->copyMetadata(LI, Instruction::PoisonGeneratingMetadataIDs);
+ V2->copyMetadata(LI, Instruction::PoisonGeneratingMetadataIDs);
return SelectInst::Create(SI->getCondition(), V1, V2);
}
diff --git a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
index 513aba12cdbbf5..5a977882504cef 100644
--- a/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
+++ b/llvm/test/Transforms/InstCombine/loadstore-metadata.ll
@@ -190,8 +190,8 @@ entry:
define ptr @preserve_load_metadata_after_select_transform1(i1 %c, ptr dereferenceable(8) %a, ptr dereferenceable(8) %b) {
; CHECK-LABEL: @preserve_load_metadata_after_select_transform1(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[B_VAL:%.*]] = load ptr, ptr [[B:%.*]], align 1, !tbaa [[TBAA0]], !dereferenceable [[META8]], !llvm.access.group [[META6]]
-; CHECK-NEXT: [[A_VAL:%.*]] = load ptr, ptr [[A:%.*]], align 1, !tbaa [[TBAA0]], !dereferenceable [[META8]], !llvm.access.group [[META6]]
+; CHECK-NEXT: [[B_VAL:%.*]] = load ptr, ptr [[B:%.*]], align 1, !nonnull [[META6]], !align [[META8]]
+; CHECK-NEXT: [[A_VAL:%.*]] = load ptr, ptr [[A:%.*]], align 1, !nonnull [[META6]], !align [[META8]]
; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[C:%.*]], ptr [[B_VAL]], ptr [[A_VAL]]
; CHECK-NEXT: ret ptr [[L_SEL]]
;
@@ -205,8 +205,8 @@ entry:
define i32 @preserve_load_metadata_after_select_transform_range(i1 %c, ptr dereferenceable(8) %a, ptr dereferenceable(8) %b) {
; CHECK-LABEL: @preserve_load_metadata_after_select_transform_range(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B:%.*]], align 1
-; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A:%.*]], align 1
+; CHECK-NEXT: [[B_VAL:%.*]] = load i32, ptr [[B:%.*]], align 1, !range [[RNG10:![0-9]+]]
+; CHECK-NEXT: [[A_VAL:%.*]] = load i32, ptr [[A:%.*]], align 1, !range [[RNG10]]
; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[C:%.*]], i32 [[B_VAL]], i32 [[A_VAL]]
; CHECK-NEXT: ret i32 [[L_SEL]]
;
@@ -294,7 +294,7 @@ define double @preserve_load_metadata_after_select_transform_metadata_missing_4(
; CHECK-LABEL: @preserve_load_metadata_after_select_transform_metadata_missing_4(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[L_A:%.*]] = load double, ptr [[A:%.*]], align 8, !tbaa [[TBAA0]], !alias.scope [[META3]], !noalias [[META3]], !llvm.access.group [[META6]]
-; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8, !tbaa [[TBAA0]], !alias.scope [[META10:![0-9]+]], !noalias [[META10]], !llvm.access.group [[ACC_GRP13:![0-9]+]]
+; CHECK-NEXT: [[L_B:%.*]] = load double, ptr [[B:%.*]], align 8, !tbaa [[TBAA0]], !alias.scope [[META11:![0-9]+]], !noalias [[META11]], !llvm.access.group [[ACC_GRP14:![0-9]+]]
; CHECK-NEXT: [[CMP_I:%.*]] = fcmp fast olt double [[L_A]], [[L_B]]
; CHECK-NEXT: [[L_SEL:%.*]] = select i1 [[CMP_I]], double [[L_B]], double [[L_A]]
; CHECK-NEXT: ret double [[L_SEL]]
@@ -337,8 +337,9 @@ entry:
; CHECK: [[META7]] = !{i32 1}
; CHECK: [[META8]] = !{i64 8}
; CHECK: [[ACC_GRP9]] = distinct !{}
-; CHECK: [[META10]] = !{[[META11:![0-9]+]]}
-; CHECK: [[META11]] = distinct !{[[META11]], [[META12:![0-9]+]]}
-; CHECK: [[META12]] = distinct !{[[META12]]}
-; CHECK: [[ACC_GRP13]] = distinct !{}
+; CHECK: [[RNG10]] = !{i32 0, i32 42}
+; CHECK: [[META11]] = !{[[META12:![0-9]+]]}
+; CHECK: [[META12]] = distinct !{[[META12]], [[META13:![0-9]+]]}
+; CHECK: [[META13]] = distinct !{[[META13]]}
+; CHECK: [[ACC_GRP14]] = distinct !{}
;.
diff --git a/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll b/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll
index 9f67496f2ef159..7bb22e2e9f5b54 100644
--- a/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll
+++ b/llvm/test/Transforms/PhaseOrdering/X86/preserve-access-group.ll
@@ -4,6 +4,7 @@
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
+; FIXME: !llvm.access.group should be preserved, loop should be vectorized.
; End-to-end test for https://github.com/llvm/llvm-project/issues/115595.
define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %face_cell, ptr noalias noundef %x, ptr noalias noundef %y) #0 {
; CHECK-LABEL: define void @test(
@@ -14,16 +15,16 @@ define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %f
; CHECK: [[FOR_BODY_PREHEADER]]:
; CHECK-NEXT: [[TMP0:%.*]] = zext nneg i32 [[NFACE]] to i64
; CHECK-NEXT: [[INVARIANT_GEP:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[TMP0]]
-; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[NFACE]], 4
-; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[FOR_BODY_PREHEADER14:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NFACE]], 4
+; CHECK-NEXT: br i1 [[TMP1]], label %[[FOR_BODY_PREHEADER14:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
-; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP0]], 2147483644
+; CHECK-NEXT: [[UNROLL_ITER:%.*]] = and i64 [[TMP0]], 2147483644
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[INDEX]]
-; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4, !tbaa [[TBAA0:![0-9]+]], !llvm.access.group [[ACC_GRP4:![0-9]+]]
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[INVARIANT_GEP]], i64 [[INDEX]]
+; CHECK-NEXT: [[INDVARS_IV_EPIL:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw i32, ptr [[FACE_CELL]], i64 [[INDVARS_IV_EPIL]]
+; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP10]], align 4, !tbaa [[TBAA0:![0-9]+]], !llvm.access.group [[ACC_GRP4:![0-9]+]]
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw i32, ptr [[INVARIANT_GEP]], i64 [[INDVARS_IV_EPIL]]
; CHECK-NEXT: [[WIDE_LOAD12:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4, !tbaa [[TBAA0]], !llvm.access.group [[ACC_GRP4]]
; CHECK-NEXT: [[TMP3:%.*]] = sext <4 x i32> [[WIDE_LOAD]] to <4 x i64>
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds double, ptr [[Y]], <4 x i64> [[TMP3]]
@@ -34,14 +35,14 @@ define void @test(i32 noundef %nface, i32 noundef %ncell, ptr noalias noundef %f
; CHECK-NEXT: [[TMP7:%.*]] = fcmp fast olt <4 x double> [[WIDE_MASKED_GATHER]], [[WIDE_MASKED_GATHER13]]
; CHECK-NEXT: [[TMP8:%.*]] = select <4 x i1> [[TMP7]], <4 x double> [[WIDE_MASKED_GATHER13]], <4 x double> [[WIDE_MASKED_GATHER]]
; CHECK-NEXT: tail call void @llvm.masked.scatter.v4f64.v4p0(<4 x double> [[TMP8]], <4 x ptr> [[TMP4]], i32 8, <4 x i1> splat (i1 true)), !tbaa [[TBAA5]], !llvm.access.group [[ACC_GRP4]]
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDVARS_IV_EPIL]], 4
+; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[UNROLL_ITER]]
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[TMP0]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[UNROLL_ITER]], [[TMP0]]
; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY_PREHEADER14]]
; CHECK: [[FOR_BODY_PREHEADER14]]:
-; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[N_VEC]], %[[MIDDLE_BLOCK]] ]
+; CHECK-NEXT: [[INDVARS_IV_PH:%.*]] = phi i64 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[UNROLL_ITER]], %[[MIDDLE_BLOCK]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_COND_CLEANUP]]:
; CHECK-NEXT: ret void
More information about the llvm-commits
mailing list