[llvm] 055e4ff - [VPlan] Don't narrow op multiple times in narrowInterleaveGroups.
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 10 11:23:11 PDT 2025
Author: Florian Hahn
Date: 2025-09-10T19:22:42+01:00
New Revision: 055e4ff35ab0789494ff9a3d41e3182a3aca8d0a
URL: https://github.com/llvm/llvm-project/commit/055e4ff35ab0789494ff9a3d41e3182a3aca8d0a
DIFF: https://github.com/llvm/llvm-project/commit/055e4ff35ab0789494ff9a3d41e3182a3aca8d0a.diff
LOG: [VPlan] Don't narrow op multiple times in narrowInterleaveGroups.
Track which ops already have been narrowed, to avoid narrowing the same
operation multiple times. Repeated narrowing will lead to incorrect
results, because we could first narrow from an interleave group -> wide
load, and then narrow the wide load > single-scalar load.
Fixes thttps://github.com/llvm/llvm-project/issues/156190.
Added:
Modified:
llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 6b122e19c0169..ecb0f093e772d 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -4017,9 +4017,10 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
return;
// Convert InterleaveGroup \p R to a single VPWidenLoadRecipe.
- auto NarrowOp = [](VPValue *V) -> VPValue * {
+ SmallPtrSet<VPValue *, 4> NarrowedOps;
+ auto NarrowOp = [&NarrowedOps](VPValue *V) -> VPValue * {
auto *R = V->getDefiningRecipe();
- if (!R)
+ if (!R || NarrowedOps.contains(V))
return V;
if (auto *LoadGroup = dyn_cast<VPInterleaveRecipe>(R)) {
// Narrow interleave group to wide load, as transformed VPlan will only
@@ -4029,6 +4030,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
LoadGroup->getAddr(), LoadGroup->getMask(), /*Consecutive=*/true,
/*Reverse=*/false, {}, LoadGroup->getDebugLoc());
L->insertBefore(LoadGroup);
+ NarrowedOps.insert(L);
return L;
}
@@ -4036,6 +4038,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
assert(RepR->isSingleScalar() &&
isa<LoadInst>(RepR->getUnderlyingInstr()) &&
"must be a single scalar load");
+ NarrowedOps.insert(RepR);
return RepR;
}
auto *WideLoad = cast<VPWidenLoadRecipe>(R);
@@ -4049,6 +4052,7 @@ void VPlanTransforms::narrowInterleaveGroups(VPlan &Plan, ElementCount VF,
/*IsUniform*/ true,
/*Mask*/ nullptr, *WideLoad);
N->insertBefore(WideLoad);
+ NarrowedOps.insert(N);
return N;
};
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll
index 687222942dc1b..ade929c791a47 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/transform-narrow-interleave-to-widen-memory-with-wide-ops.ll
@@ -1169,7 +1169,7 @@ exit:
ret void
}
-; FIXME: Currently incorrectly narrows the load,
+; Make sure multiple uses of a narrowed op are handled correctly,
; https://github.com/llvm/llvm-project/issues/156190.
define void @multiple_store_groups_storing_same_wide_bin_op(ptr noalias %A, ptr noalias %B, ptr noalias %C) {
; VF2-LABEL: define void @multiple_store_groups_storing_same_wide_bin_op(
@@ -1181,9 +1181,7 @@ define void @multiple_store_groups_storing_same_wide_bin_op(ptr noalias %A, ptr
; VF2: [[VECTOR_BODY]]:
; VF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; VF2-NEXT: [[TMP0:%.*]] = getelementptr { double, double }, ptr [[A]], i64 [[INDEX]]
-; VF2-NEXT: [[TMP1:%.*]] = load double, ptr [[TMP0]], align 8
-; VF2-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP1]], i64 0
-; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
+; VF2-NEXT: [[BROADCAST_SPLAT:%.*]] = load <2 x double>, ptr [[TMP0]], align 8
; VF2-NEXT: [[TMP2:%.*]] = fadd contract <2 x double> [[BROADCAST_SPLAT]], splat (double 2.000000e+01)
; VF2-NEXT: [[TMP3:%.*]] = getelementptr { double, double }, ptr [[B]], i64 [[INDEX]]
; VF2-NEXT: store <2 x double> [[TMP2]], ptr [[TMP3]], align 8
More information about the llvm-commits
mailing list