[llvm] [LV] Increase coverage of uniformity-rewriter (PR #161219)

Ramkumar Ramachandra via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 29 08:49:47 PDT 2025


artagnon wrote:

The following diff applied on top seems to be correct:

```diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index ff35db14f709..4e917adb3f29 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -528,14 +528,6 @@ public:
     return SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter>::visit(S);
   }

-  const SCEV *visitUnknown(const SCEVUnknown *S) {
-    if (SE.isLoopInvariant(S, TheLoop))
-      return S;
-    // The value could vary across iterations.
-    CannotAnalyze = true;
-    return S;
-  }
-
   const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
     // Could not analyze the expression.
     CannotAnalyze = true;
@@ -545,14 +537,6 @@ public:
   static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
                              unsigned StepMultiplier, unsigned Offset,
                              Loop *TheLoop) {
-    /// Bail out if the expression does not contain an UDiv expression.
-    /// Uniform values which are not loop invariant require operations to strip
-    /// out the lowest bits. For now just look for UDivs and use it to avoid
-    /// re-writing UDIV-free expressions for other lanes to limit compile time.
-    if (!SCEVExprContains(S,
-                          [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
-      return SE.getCouldNotCompute();
-
     SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
                                              TheLoop);
     const SCEV *Result = Rewriter.visit(S);
diff --git a/llvm/test/Transforms/LoopVectorize/uniformity-rewriter.ll b/llvm/test/Transforms/LoopVectorize/uniformity-rewriter.ll
index 8f65b07c0797..8682ad48340e 100644
--- a/llvm/test/Transforms/LoopVectorize/uniformity-rewriter.ll
+++ b/llvm/test/Transforms/LoopVectorize/uniformity-rewriter.ll
@@ -12,23 +12,18 @@ define i32 @uniformityrew(ptr %src, i32 %x, i1 %c, i64 %n) {
 ; CHECK:       [[VECTOR_PH]]:
 ; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
 ; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP0:%.*]] = ashr <4 x i32> [[BROADCAST_SPLAT]], splat (i32 1)
-; CHECK-NEXT:    [[TMP6:%.*]] = and <4 x i32> [[TMP0]], splat (i32 1)
-; CHECK-NEXT:    [[TMP7:%.*]] = xor <4 x i32> [[TMP6]], splat (i32 1)
-; CHECK-NEXT:    [[TMP8:%.*]] = zext <4 x i32> [[TMP7]] to <4 x i64>
-; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i32, ptr [[SRC]], <4 x i64> [[TMP8]]
+; CHECK-NEXT:    [[TMP0:%.*]] = ashr i32 [[X]], 1
+; CHECK-NEXT:    [[TMP1:%.*]] = and i32 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP2:%.*]] = xor i32 [[TMP1]], 1
+; CHECK-NEXT:    [[TMP3:%.*]] = zext i32 [[TMP2]] to i64
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[TMP3]]
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
 ; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr i32, ptr [[SRC]], <4 x i64> [[VEC_IND]]
-; CHECK-NEXT:    [[TMP11:%.*]] = select i1 [[C]], <4 x ptr> [[TMP9]], <4 x ptr> [[TMP10]]
-; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x ptr> [[TMP11]], i32 3
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[SRC]], i64 [[INDEX]]
+; CHECK-NEXT:    [[TMP12:%.*]] = select i1 [[C]], ptr [[TMP4]], ptr [[TMP5]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = load i32, ptr [[TMP12]], align 4
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
 ; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
 ; CHECK-NEXT:    br i1 [[TMP14]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
```

https://github.com/llvm/llvm-project/pull/161219


More information about the llvm-commits mailing list