[llvm] [LV] Check isPredInst instead of isScalarWithPred in uniform analysis. (PR #98892)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 15 05:53:37 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Florian Hahn (fhahn)
<details>
<summary>Changes</summary>
Any instruction marked as uniform will result in a uniform VPReplicateRecipe. If it requires predication, it will be placed in a replicate region, even if isScalarWithPredication returns false.
Check isPredicatedInst instead of isScalarWithPredication to avoid generating uniform VPReplicateRecipes placed inside a replicate region. This fixes an assertion when using scalable VFs.
Fixes https://github.com/llvm/llvm-project/issues/80416. Fixes https://github.com/llvm/llvm-project/issues/94328.
---
Full diff: https://github.com/llvm/llvm-project/pull/98892.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorize.cpp (+5-4)
- (added) llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll (+92)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
index 5520baef7152d..296a346fa7ae1 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@@ -3907,7 +3907,7 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
SetVector<Instruction *> Worklist;
// Add uniform instructions demanding lane 0 to the worklist. Instructions
- // that are scalar with predication must not be considered uniform after
+ // that are require predication must not be considered uniform after
// vectorization, because that would create an erroneous replicating region
// where only a single instance out of VF should be formed.
// TODO: optimize such seldom cases if found important, see PR40816.
@@ -3917,9 +3917,10 @@ void LoopVectorizationCostModel::collectLoopUniforms(ElementCount VF) {
<< *I << "\n");
return;
}
- if (isScalarWithPredication(I, VF)) {
- LLVM_DEBUG(dbgs() << "LV: Found not uniform being ScalarWithPredication: "
- << *I << "\n");
+ if (isPredicatedInst(I)) {
+ LLVM_DEBUG(
+ dbgs() << "LV: Found not uniform due to requiring predication: " << *I
+ << "\n");
return;
}
LLVM_DEBUG(dbgs() << "LV: Found uniform instruction: " << *I << "\n");
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
new file mode 100644
index 0000000000000..6fd74d1416873
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll
@@ -0,0 +1,92 @@
+; RUN: opt -p loop-vectorize -mtriple aarch64 -mcpu=neoverse-v1 -S %s | FileCheck %s
+
+; Test case for https://github.com/llvm/llvm-project/issues/94328.
+define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
+entry:
+ %conv61 = zext i32 %x to i64
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %div18 = sdiv i64 %M, %conv6
+ %conv20 = trunc i64 %div18 to i32
+ %mul30 = mul i64 %div18, %conv61
+ %sub31 = sub i64 %iv, %mul30
+ %conv34 = trunc i64 %sub31 to i32
+ %mul35 = mul i32 %x, %conv20
+ %add36 = add i32 %mul35, %conv34
+ %idxprom = sext i32 %add36 to i64
+ %gep = getelementptr double, ptr %dst, i64 %idxprom
+ store double 0.000000e+00, ptr %gep, align 8
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %N
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+define void @sdiv_feeding_gep_predicated(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
+entry:
+ %conv61 = zext i32 %x to i64
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
+ %c = icmp ule i64 %iv, %M
+ br i1 %c, label %then, label %loop.latch
+
+then:
+ %div18 = sdiv i64 %M, %conv6
+ %conv20 = trunc i64 %div18 to i32
+ %mul30 = mul i64 %div18, %conv61
+ %sub31 = sub i64 %iv, %mul30
+ %conv34 = trunc i64 %sub31 to i32
+ %mul35 = mul i32 %x, %conv20
+ %add36 = add i32 %mul35, %conv34
+ %idxprom = sext i32 %add36 to i64
+ %gep = getelementptr double, ptr %dst, i64 %idxprom
+ store double 0.000000e+00, ptr %gep, align 8
+ br label %loop.latch
+
+loop.latch:
+ %iv.next = add i64 %iv, 1
+ %ec = icmp eq i64 %iv.next, %N
+ br i1 %ec, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; Test case for https://github.com/llvm/llvm-project/issues/80416.
+define void @udiv_urem_feeding_gep(i64 %x, ptr %dst, i64 %N) {
+entry:
+ %mul.1.i = mul i64 %x, %x
+ %mul.2.i = mul i64 %mul.1.i, %x
+ br label %loop
+
+loop:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
+ %div.i = udiv i64 %iv, %mul.2.i
+ %rem.i = urem i64 %iv, %mul.2.i
+ %div.1.i = udiv i64 %rem.i, %mul.1.i
+ %rem.1.i = urem i64 %rem.i, %mul.1.i
+ %div.2.i = udiv i64 %rem.1.i, %x
+ %rem.2.i = urem i64 %rem.1.i, %x
+ %mul.i = mul i64 %x, %div.i
+ %add.i = add i64 %mul.i, %div.1.i
+ %mul.1.i9 = mul i64 %add.i, %x
+ %add.1.i = add i64 %mul.1.i9, %div.2.i
+ %mul.2.i11 = mul i64 %add.1.i, %x
+ %add.2.i = add i64 %mul.2.i11, %rem.2.i
+ %sext.i = shl i64 %add.2.i, 32
+ %conv6.i = ashr i64 %sext.i, 32
+ %gep = getelementptr i64, ptr %dst, i64 %conv6.i
+ store i64 %div.i, ptr %gep, align 4
+ %iv.next = add i64 %iv, 1
+ %exitcond.not = icmp eq i64 %iv, %N
+ br i1 %exitcond.not, label %exit, label %loop
+
+exit:
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/98892
More information about the llvm-commits
mailing list