[llvm] 0c028bb - [LV] Always add uniform pointers to uniforms list.

Thu Sep 18 14:56:46 PDT 2025

Author: Florian Hahn
Date: 2025-09-18T22:56:19+01:00
New Revision: 0c028bbf33d96045871f254fa1810f9767292506

URL: https://github.com/llvm/llvm-project/commit/0c028bbf33d96045871f254fa1810f9767292506
DIFF: https://github.com/llvm/llvm-project/commit/0c028bbf33d96045871f254fa1810f9767292506.diff

LOG: [LV] Always add uniform pointers to uniforms list.

Always add pointers proved to be uniform via legal/SCEV to worklist.
This extends the existing logic to handle a few more pointers known to
be uniform.

Added: 
    

Modified: 
    llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
    llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 2efd3564ab898..a90ebb679f7af 100644

--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3177,6 +3177,11 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
       if (!Ptr)
         continue;
 
+      // If the pointer can be proven to be uniform, always add it to the
+      // worklist.
+      if (isa<Instruction>(Ptr) && Legal->isUniform(Ptr, VF))
+        AddToWorklistIfAllowed(cast<Instruction>(Ptr));
+
       if (IsUniformMemOpUse(&I))
         AddToWorklistIfAllowed(&I);
 

diff  --git a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
index 10975a84c680f..cab6179131e56 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/replicating-load-store-costs.ll
@@ -152,34 +152,18 @@ define void @uniform_gep_for_replicating_gep(ptr %dst) {
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], splat (i32 2)
-; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
-; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 1
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 2
-; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 3
-; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <2 x i32> [[VEC_IND]], zeroinitializer
 ; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq <2 x i32> [[STEP_ADD]], zeroinitializer
-; CHECK-NEXT:    [[TMP6:%.*]] = lshr i32 [[TMP0]], 1
-; CHECK-NEXT:    [[TMP7:%.*]] = lshr i32 [[TMP1]], 1
-; CHECK-NEXT:    [[TMP8:%.*]] = lshr i32 [[TMP2]], 1
-; CHECK-NEXT:    [[TMP9:%.*]] = lshr i32 [[TMP3]], 1
-; CHECK-NEXT:    [[TMP10:%.*]] = zext <2 x i1> [[TMP4]] to <2 x i8>
+; CHECK-NEXT:    [[TMP8:%.*]] = lshr i32 [[INDEX]], 1
+; CHECK-NEXT:    [[TMP9:%.*]] = lshr i32 [[TMP2]], 1
 ; CHECK-NEXT:    [[TMP11:%.*]] = zext <2 x i1> [[TMP5]] to <2 x i8>
-; CHECK-NEXT:    [[TMP12:%.*]] = zext i32 [[TMP6]] to i64
-; CHECK-NEXT:    [[TMP13:%.*]] = zext i32 [[TMP7]] to i64
 ; CHECK-NEXT:    [[TMP14:%.*]] = zext i32 [[TMP8]] to i64
 ; CHECK-NEXT:    [[TMP15:%.*]] = zext i32 [[TMP9]] to i64
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP12]]
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP13]]
 ; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP14]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = getelementptr i64, ptr [[DST]], i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP20:%.*]] = extractelement <2 x i8> [[TMP10]], i32 0
-; CHECK-NEXT:    store i8 [[TMP20]], ptr [[TMP16]], align 1
-; CHECK-NEXT:    [[TMP21:%.*]] = extractelement <2 x i8> [[TMP10]], i32 1
-; CHECK-NEXT:    store i8 [[TMP21]], ptr [[TMP17]], align 1
-; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x i8> [[TMP11]], i32 0
+; CHECK-NEXT:    [[TMP22:%.*]] = extractelement <2 x i8> [[TMP11]], i32 1
 ; CHECK-NEXT:    store i8 [[TMP22]], ptr [[TMP18]], align 1
-; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <2 x i8> [[TMP11]], i32 1
-; CHECK-NEXT:    store i8 [[TMP23]], ptr [[TMP19]], align 1
+; CHECK-NEXT:    store i8 [[TMP22]], ptr [[TMP19]], align 1
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
 ; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], splat (i32 2)
 ; CHECK-NEXT:    [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT]], 128