[llvm] [ LV] Fix for miscompile with disjoint or (PR #81922)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 15 13:27:08 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: None (annamthomas)
<details>
<summary>Changes</summary>
- Precommit test for issue in pr81872
- [LoopVectorize] Fix for miscompile with disjoint or
---
Full diff: https://github.com/llvm/llvm-project/pull/81922.diff
2 Files Affected:
- (modified) llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp (+11)
- (added) llvm/test/Transforms/LoopVectorize/X86/pr81872.ll (+102)
``````````diff
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 37a356c43e29a4..f2744b5d66a26f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -888,6 +888,17 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
return false;
} // end of PHI handling
+ // or-disjoint instruction which is predicated maybe disjoint because of a
+ // dominating condition. Avoid vectorizing in such cases, since dropping
+ // the disjoint (to avoid poison) changes the meaning of the instruction
+ // which was originally an add.
+ // TODO: We should check the terminating condition of all
+ // dominating blocks in the loop to see if Op0 is used for their
+ // terminating condition.
+ if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I))
+ if (Op->isDisjoint() && blockNeedsPredication(BB))
+ return false;
+
// We handle calls that:
// * Are debug info intrinsics.
// * Have a mapping to an IR intrinsic.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll
new file mode 100644
index 00000000000000..20a6732e340b78
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes=loop-vectorize < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at global = external global ptr addrspace(1), align 8
+
+; PR 81872 explains the issue.
+
+; If we vectorize, we have a miscompile where array IV and thereby value stored in (arr[99],
+; arr[98]) is calculated incorrectly since disjoint or was only disjoint because
+; of dominating conditions. Dropping the disjoint to avoid poison still changes
+; the behaviour since now the or is not longer equivalent to the add.
+; Function Attrs: uwtable
+define void @test(ptr addrspace(1) noundef align 8 dereferenceable_or_null(16) %arg1) #0 {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr addrspace(1) noundef align 8 dereferenceable_or_null(16) [[ARG1:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: bb5:
+; CHECK-NEXT: [[GETELEMENTPTR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG1]], i64 16
+; CHECK-NEXT: br label [[BB8:%.*]]
+; CHECK: bb6:
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[PHI9:%.*]], 1
+; CHECK-NEXT: [[ICMP7:%.*]] = icmp sgt i32 [[PHI9]], -2
+; CHECK-NEXT: br i1 [[ICMP7]], label [[BB10:%.*]], label [[BB8]]
+; CHECK: bb8:
+; CHECK-NEXT: [[PHI9]] = phi i32 [ -60516, [[BB5:%.*]] ], [ [[ADD]], [[BB6:%.*]] ]
+; CHECK-NEXT: br label [[BB15:%.*]]
+; CHECK: bb10:
+; CHECK-NEXT: [[GETELEMENTPTR11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG1]], i64 96
+; CHECK-NEXT: [[LOAD12:%.*]] = load i64, ptr addrspace(1) [[GETELEMENTPTR11]], align 8, !noundef [[META0:![0-9]+]]
+; CHECK-NEXT: [[LOAD13:%.*]] = load ptr addrspace(1), ptr @global, align 8, !invariant.load [[META0]], !nonnull [[META0]], !dereferenceable_or_null !1, !align [[META2:![0-9]+]]
+; CHECK-NEXT: [[GETELEMENTPTR14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[LOAD13]], i64 848
+; CHECK-NEXT: store i64 [[LOAD12]], ptr addrspace(1) [[GETELEMENTPTR14]], align 8
+; CHECK-NEXT: ret void
+; CHECK: bb15:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 99, [[BB8]] ], [ [[IV_NEXT:%.*]], [[BB20:%.*]] ]
+; CHECK-NEXT: [[AND:%.*]] = and i64 [[IV]], 1
+; CHECK-NEXT: [[ICMP17:%.*]] = icmp eq i64 [[AND]], 0
+; CHECK-NEXT: br i1 [[ICMP17]], label [[BB18:%.*]], label [[BB20]], !prof [[PROF3:![0-9]+]]
+; CHECK: bb18:
+; CHECK-NEXT: [[OR:%.*]] = or disjoint i64 [[IV]], 1
+; CHECK-NEXT: [[GETELEMENTPTR19:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[GETELEMENTPTR]], i64 [[OR]]
+; CHECK-NEXT: store i64 1, ptr addrspace(1) [[GETELEMENTPTR19]], align 8
+; CHECK-NEXT: br label [[BB20]]
+; CHECK: bb20:
+; CHECK-NEXT: [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; CHECK-NEXT: [[ICMP22:%.*]] = icmp eq i64 [[IV_NEXT]], 90
+; CHECK-NEXT: br i1 [[ICMP22]], label [[BB6]], label [[BB15]], !prof [[PROF4:![0-9]+]]
+;
+bb5:
+ %getelementptr = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 16
+ br label %bb8
+
+bb6: ; preds = %bb20
+ %add = add nsw i32 %phi9, 1
+ %icmp7 = icmp sgt i32 %phi9, -2
+ br i1 %icmp7, label %bb10, label %bb8
+
+bb8: ; preds = %bb6, %bb5
+ %phi9 = phi i32 [ -60516, %bb5 ], [ %add, %bb6 ]
+ br label %bb15
+
+bb10: ; preds = %bb6
+ %getelementptr11 = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 96
+ %load12 = load i64, ptr addrspace(1) %getelementptr11, align 8, !noundef !4
+ %load13 = load ptr addrspace(1), ptr @global, align 8, !invariant.load !4, !nonnull !4, !dereferenceable_or_null !16, !align !17
+ %getelementptr14 = getelementptr inbounds i8, ptr addrspace(1) %load13, i64 848
+ store i64 %load12, ptr addrspace(1) %getelementptr14, align 8
+ ret void
+
+bb15: ; preds = %bb20, %bb8
+ %iv = phi i64 [ 99, %bb8 ], [ %iv.next, %bb20 ]
+ %and = and i64 %iv, 1
+ %icmp17 = icmp eq i64 %and, 0
+ br i1 %icmp17, label %bb18, label %bb20, !prof !21
+
+bb18: ; preds = %bb15
+ %or = or disjoint i64 %iv, 1
+ %getelementptr19 = getelementptr inbounds i64, ptr addrspace(1) %getelementptr, i64 %or
+ store i64 1, ptr addrspace(1) %getelementptr19, align 8
+ br label %bb20
+
+bb20: ; preds = %bb18, %bb15
+ %iv.next = add nsw i64 %iv, -1
+ %icmp22 = icmp eq i64 %iv.next, 90
+ br i1 %icmp22, label %bb6, label %bb15, !prof !22
+}
+
+attributes #0 = {"target-cpu"="haswell" "target-features"="+avx2" }
+
+!4 = !{}
+!10 = !{i32 1}
+!16 = !{i64 864}
+!17 = !{i64 8}
+!21 = !{!"branch_weights", i32 1, i32 1}
+!22 = !{!"branch_weights", i32 1, i32 95}
+;.
+; CHECK: [[META0]] = !{}
+; CHECK: [[META2]] = !{i64 8}
+; CHECK: [[PROF3]] = !{!"branch_weights", i32 1, i32 1}
+; CHECK: [[PROF4]] = !{!"branch_weights", i32 1, i32 95}
+;.
``````````
</details>
https://github.com/llvm/llvm-project/pull/81922
More information about the llvm-commits
mailing list