[llvm] 19e6d54 - [LV] Re-use existing compare if possible for diff checks.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Thu Nov 23 03:36:14 PST 2023
Author: Florian Hahn
Date: 2023-11-23T11:35:21Z
New Revision: 19e6d541889f24e21c7a9a6e021aeb82efd4dcb2
URL: https://github.com/llvm/llvm-project/commit/19e6d541889f24e21c7a9a6e021aeb82efd4dcb2
DIFF: https://github.com/llvm/llvm-project/commit/19e6d541889f24e21c7a9a6e021aeb82efd4dcb2.diff
LOG: [LV] Re-use existing compare if possible for diff checks.
SCEV simplifying the subtraction may result in redundant compares that
are all OR'd together. Keep track of the generated operands in
SeenCompares, with the key being the pair of operands for the compare.
If we alrady generated the same compare previously, skip it.
Added:
Modified:
llvm/lib/Transforms/Utils/LoopUtils.cpp
llvm/test/Transforms/LoopVectorize/runtime-checks-difference-simplifications.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 85e28ea82fb4710..acce8d10cef8246 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1787,17 +1787,28 @@ Value *llvm::addDiffRuntimeChecks(
// Our instructions might fold to a constant.
Value *MemoryRuntimeCheck = nullptr;
+ auto &SE = *Expander.getSE();
+ // Map to keep track of created compares, The key is the pair of operands for
+ // the compare, to allow detecting and re-using redundant compares.
+ DenseMap<std::pair<Value *, Value *>, Value *> SeenCompares;
for (const auto &C : Checks) {
Type *Ty = C.SinkStart->getType();
// Compute VF * IC * AccessSize.
auto *VFTimesUFTimesSize =
ChkBuilder.CreateMul(GetVF(ChkBuilder, Ty->getScalarSizeInBits()),
ConstantInt::get(Ty, IC * C.AccessSize));
- auto &SE = *Expander.getSE();
Value *Diff = Expander.expandCodeFor(
SE.getMinusSCEV(C.SinkStart, C.SrcStart), Ty, Loc);
- Value *IsConflict =
+
+ // Check if the same compare has already been created earlier. In that case,
+ // there is no need to check it again.
+ Value *IsConflict = SeenCompares.lookup({Diff, VFTimesUFTimesSize});
+ if (IsConflict)
+ continue;
+
+ IsConflict =
ChkBuilder.CreateICmpULT(Diff, VFTimesUFTimesSize, "
diff .check");
+ SeenCompares.insert({{Diff, VFTimesUFTimesSize}, IsConflict});
if (C.NeedsFreeze)
IsConflict =
ChkBuilder.CreateFreeze(IsConflict, IsConflict->getName() + ".fr");
diff --git a/llvm/test/Transforms/LoopVectorize/runtime-checks-
diff erence-simplifications.ll b/llvm/test/Transforms/LoopVectorize/runtime-checks-
diff erence-simplifications.ll
index 6bc4e6a8662e019..cdbeff5a9d6f675 100644
--- a/llvm/test/Transforms/LoopVectorize/runtime-checks-
diff erence-simplifications.ll
+++ b/llvm/test/Transforms/LoopVectorize/runtime-checks-
diff erence-simplifications.ll
@@ -51,89 +51,7 @@ define void @test_large_number_of_group(ptr %dst, i64 %off, i64 %N) {
; CHECK-NEXT: [[TMP9:%.*]] = mul i64 [[OFF]], 88
; CHECK-NEXT: [[DIFF_CHECK18:%.*]] = icmp ult i64 [[TMP9]], 32
; CHECK-NEXT: [[CONFLICT_RDX19:%.*]] = or i1 [[CONFLICT_RDX17]], [[DIFF_CHECK18]]
-; CHECK-NEXT: [[DIFF_CHECK20:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
-; CHECK-NEXT: [[CONFLICT_RDX21:%.*]] = or i1 [[CONFLICT_RDX19]], [[DIFF_CHECK20]]
-; CHECK-NEXT: [[DIFF_CHECK22:%.*]] = icmp ult i64 [[TMP0]], 32
-; CHECK-NEXT: [[CONFLICT_RDX23:%.*]] = or i1 [[CONFLICT_RDX21]], [[DIFF_CHECK22]]
-; CHECK-NEXT: [[DIFF_CHECK24:%.*]] = icmp ult i64 [[TMP1]], 32
-; CHECK-NEXT: [[CONFLICT_RDX25:%.*]] = or i1 [[CONFLICT_RDX23]], [[DIFF_CHECK24]]
-; CHECK-NEXT: [[DIFF_CHECK26:%.*]] = icmp ult i64 [[TMP2]], 32
-; CHECK-NEXT: [[CONFLICT_RDX27:%.*]] = or i1 [[CONFLICT_RDX25]], [[DIFF_CHECK26]]
-; CHECK-NEXT: [[DIFF_CHECK28:%.*]] = icmp ult i64 [[TMP3]], 32
-; CHECK-NEXT: [[CONFLICT_RDX29:%.*]] = or i1 [[CONFLICT_RDX27]], [[DIFF_CHECK28]]
-; CHECK-NEXT: [[DIFF_CHECK30:%.*]] = icmp ult i64 [[TMP4]], 32
-; CHECK-NEXT: [[CONFLICT_RDX31:%.*]] = or i1 [[CONFLICT_RDX29]], [[DIFF_CHECK30]]
-; CHECK-NEXT: [[DIFF_CHECK32:%.*]] = icmp ult i64 [[TMP5]], 32
-; CHECK-NEXT: [[CONFLICT_RDX33:%.*]] = or i1 [[CONFLICT_RDX31]], [[DIFF_CHECK32]]
-; CHECK-NEXT: [[DIFF_CHECK34:%.*]] = icmp ult i64 [[TMP6]], 32
-; CHECK-NEXT: [[CONFLICT_RDX35:%.*]] = or i1 [[CONFLICT_RDX33]], [[DIFF_CHECK34]]
-; CHECK-NEXT: [[DIFF_CHECK36:%.*]] = icmp ult i64 [[TMP7]], 32
-; CHECK-NEXT: [[CONFLICT_RDX37:%.*]] = or i1 [[CONFLICT_RDX35]], [[DIFF_CHECK36]]
-; CHECK-NEXT: [[DIFF_CHECK38:%.*]] = icmp ult i64 [[TMP8]], 32
-; CHECK-NEXT: [[CONFLICT_RDX39:%.*]] = or i1 [[CONFLICT_RDX37]], [[DIFF_CHECK38]]
-; CHECK-NEXT: [[DIFF_CHECK40:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
-; CHECK-NEXT: [[CONFLICT_RDX41:%.*]] = or i1 [[CONFLICT_RDX39]], [[DIFF_CHECK40]]
-; CHECK-NEXT: [[DIFF_CHECK42:%.*]] = icmp ult i64 [[TMP0]], 32
-; CHECK-NEXT: [[CONFLICT_RDX43:%.*]] = or i1 [[CONFLICT_RDX41]], [[DIFF_CHECK42]]
-; CHECK-NEXT: [[DIFF_CHECK44:%.*]] = icmp ult i64 [[TMP1]], 32
-; CHECK-NEXT: [[CONFLICT_RDX45:%.*]] = or i1 [[CONFLICT_RDX43]], [[DIFF_CHECK44]]
-; CHECK-NEXT: [[DIFF_CHECK46:%.*]] = icmp ult i64 [[TMP2]], 32
-; CHECK-NEXT: [[CONFLICT_RDX47:%.*]] = or i1 [[CONFLICT_RDX45]], [[DIFF_CHECK46]]
-; CHECK-NEXT: [[DIFF_CHECK48:%.*]] = icmp ult i64 [[TMP3]], 32
-; CHECK-NEXT: [[CONFLICT_RDX49:%.*]] = or i1 [[CONFLICT_RDX47]], [[DIFF_CHECK48]]
-; CHECK-NEXT: [[DIFF_CHECK50:%.*]] = icmp ult i64 [[TMP4]], 32
-; CHECK-NEXT: [[CONFLICT_RDX51:%.*]] = or i1 [[CONFLICT_RDX49]], [[DIFF_CHECK50]]
-; CHECK-NEXT: [[DIFF_CHECK52:%.*]] = icmp ult i64 [[TMP5]], 32
-; CHECK-NEXT: [[CONFLICT_RDX53:%.*]] = or i1 [[CONFLICT_RDX51]], [[DIFF_CHECK52]]
-; CHECK-NEXT: [[DIFF_CHECK54:%.*]] = icmp ult i64 [[TMP6]], 32
-; CHECK-NEXT: [[CONFLICT_RDX55:%.*]] = or i1 [[CONFLICT_RDX53]], [[DIFF_CHECK54]]
-; CHECK-NEXT: [[DIFF_CHECK56:%.*]] = icmp ult i64 [[TMP7]], 32
-; CHECK-NEXT: [[CONFLICT_RDX57:%.*]] = or i1 [[CONFLICT_RDX55]], [[DIFF_CHECK56]]
-; CHECK-NEXT: [[DIFF_CHECK58:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
-; CHECK-NEXT: [[CONFLICT_RDX59:%.*]] = or i1 [[CONFLICT_RDX57]], [[DIFF_CHECK58]]
-; CHECK-NEXT: [[DIFF_CHECK60:%.*]] = icmp ult i64 [[TMP0]], 32
-; CHECK-NEXT: [[CONFLICT_RDX61:%.*]] = or i1 [[CONFLICT_RDX59]], [[DIFF_CHECK60]]
-; CHECK-NEXT: [[DIFF_CHECK62:%.*]] = icmp ult i64 [[TMP1]], 32
-; CHECK-NEXT: [[CONFLICT_RDX63:%.*]] = or i1 [[CONFLICT_RDX61]], [[DIFF_CHECK62]]
-; CHECK-NEXT: [[DIFF_CHECK64:%.*]] = icmp ult i64 [[TMP2]], 32
-; CHECK-NEXT: [[CONFLICT_RDX65:%.*]] = or i1 [[CONFLICT_RDX63]], [[DIFF_CHECK64]]
-; CHECK-NEXT: [[DIFF_CHECK66:%.*]] = icmp ult i64 [[TMP3]], 32
-; CHECK-NEXT: [[CONFLICT_RDX67:%.*]] = or i1 [[CONFLICT_RDX65]], [[DIFF_CHECK66]]
-; CHECK-NEXT: [[DIFF_CHECK68:%.*]] = icmp ult i64 [[TMP4]], 32
-; CHECK-NEXT: [[CONFLICT_RDX69:%.*]] = or i1 [[CONFLICT_RDX67]], [[DIFF_CHECK68]]
-; CHECK-NEXT: [[DIFF_CHECK70:%.*]] = icmp ult i64 [[TMP5]], 32
-; CHECK-NEXT: [[CONFLICT_RDX71:%.*]] = or i1 [[CONFLICT_RDX69]], [[DIFF_CHECK70]]
-; CHECK-NEXT: [[DIFF_CHECK72:%.*]] = icmp ult i64 [[TMP6]], 32
-; CHECK-NEXT: [[CONFLICT_RDX73:%.*]] = or i1 [[CONFLICT_RDX71]], [[DIFF_CHECK72]]
-; CHECK-NEXT: [[DIFF_CHECK74:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
-; CHECK-NEXT: [[DIFF_CHECK75:%.*]] = icmp ult i64 [[TMP0]], 32
-; CHECK-NEXT: [[DIFF_CHECK76:%.*]] = icmp ult i64 [[TMP1]], 32
-; CHECK-NEXT: [[DIFF_CHECK77:%.*]] = icmp ult i64 [[TMP2]], 32
-; CHECK-NEXT: [[DIFF_CHECK78:%.*]] = icmp ult i64 [[TMP3]], 32
-; CHECK-NEXT: [[DIFF_CHECK79:%.*]] = icmp ult i64 [[TMP4]], 32
-; CHECK-NEXT: [[DIFF_CHECK80:%.*]] = icmp ult i64 [[TMP5]], 32
-; CHECK-NEXT: [[DIFF_CHECK81:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
-; CHECK-NEXT: [[DIFF_CHECK82:%.*]] = icmp ult i64 [[TMP0]], 32
-; CHECK-NEXT: [[DIFF_CHECK83:%.*]] = icmp ult i64 [[TMP1]], 32
-; CHECK-NEXT: [[DIFF_CHECK84:%.*]] = icmp ult i64 [[TMP2]], 32
-; CHECK-NEXT: [[DIFF_CHECK85:%.*]] = icmp ult i64 [[TMP3]], 32
-; CHECK-NEXT: [[DIFF_CHECK86:%.*]] = icmp ult i64 [[TMP4]], 32
-; CHECK-NEXT: [[DIFF_CHECK87:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
-; CHECK-NEXT: [[DIFF_CHECK88:%.*]] = icmp ult i64 [[TMP0]], 32
-; CHECK-NEXT: [[DIFF_CHECK89:%.*]] = icmp ult i64 [[TMP1]], 32
-; CHECK-NEXT: [[DIFF_CHECK90:%.*]] = icmp ult i64 [[TMP2]], 32
-; CHECK-NEXT: [[DIFF_CHECK91:%.*]] = icmp ult i64 [[TMP3]], 32
-; CHECK-NEXT: [[DIFF_CHECK92:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
-; CHECK-NEXT: [[DIFF_CHECK93:%.*]] = icmp ult i64 [[TMP0]], 32
-; CHECK-NEXT: [[DIFF_CHECK94:%.*]] = icmp ult i64 [[TMP1]], 32
-; CHECK-NEXT: [[DIFF_CHECK95:%.*]] = icmp ult i64 [[TMP2]], 32
-; CHECK-NEXT: [[DIFF_CHECK96:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
-; CHECK-NEXT: [[DIFF_CHECK97:%.*]] = icmp ult i64 [[TMP0]], 32
-; CHECK-NEXT: [[DIFF_CHECK98:%.*]] = icmp ult i64 [[TMP1]], 32
-; CHECK-NEXT: [[DIFF_CHECK99:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
-; CHECK-NEXT: [[DIFF_CHECK100:%.*]] = icmp ult i64 [[TMP0]], 32
-; CHECK-NEXT: [[DIFF_CHECK101:%.*]] = icmp ult i64 [[OFF_MUL_8]], 32
-; CHECK-NEXT: br i1 [[CONFLICT_RDX73]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
+; CHECK-NEXT: br i1 [[CONFLICT_RDX19]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
; CHECK: vector.ph:
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
More information about the llvm-commits
mailing list