[llvm] [ LV] Fix for miscompile with disjoint or (PR #81922)

Thu Feb 15 13:27:08 PST 2024

llvmbot wrote:




@llvm/pr-subscribers-llvm-transforms

Author: None (annamthomas)

<details>
<summary>Changes</summary>

- Precommit test for issue in pr81872
- [LoopVectorize] Fix for miscompile with disjoint or


---
Full diff: https://github.com/llvm/llvm-project/pull/81922.diff


2 Files Affected:

- (modified) llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp (+11) 
- (added) llvm/test/Transforms/LoopVectorize/X86/pr81872.ll (+102) 


``````````diff

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 37a356c43e29a4..f2744b5d66a26f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -888,6 +888,17 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
         return false;
       } // end of PHI handling
 
+      // or-disjoint instruction which is predicated maybe disjoint because of a
+      // dominating condition. Avoid vectorizing in such cases, since dropping
+      // the disjoint (to avoid poison) changes the meaning of the instruction
+      // which was originally an add.
+      // TODO: We should check the terminating condition of all
+      // dominating blocks in the loop to see if Op0 is used for their
+      // terminating condition.
+      if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I))
+        if (Op->isDisjoint() && blockNeedsPredication(BB))
+          return false;
+
       // We handle calls that:
       //   * Are debug info intrinsics.
       //   * Have a mapping to an IR intrinsic.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll
new file mode 100644
index 00000000000000..20a6732e340b78
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll
@@ -0,0 +1,102 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes=loop-vectorize < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at global = external global ptr addrspace(1), align 8
+
+; PR 81872 explains the issue.
+
+; If we vectorize, we have a miscompile where array IV and thereby value stored in (arr[99],
+; arr[98]) is calculated incorrectly since disjoint or was only disjoint because
+; of dominating conditions. Dropping the disjoint to avoid poison still changes
+; the behaviour since now the or is not longer equivalent to the add.
+; Function Attrs: uwtable
+define void @test(ptr addrspace(1) noundef align 8 dereferenceable_or_null(16) %arg1) #0 {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr addrspace(1) noundef align 8 dereferenceable_or_null(16) [[ARG1:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  bb5:
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG1]], i64 16
+; CHECK-NEXT:    br label [[BB8:%.*]]
+; CHECK:       bb6:
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[PHI9:%.*]], 1
+; CHECK-NEXT:    [[ICMP7:%.*]] = icmp sgt i32 [[PHI9]], -2
+; CHECK-NEXT:    br i1 [[ICMP7]], label [[BB10:%.*]], label [[BB8]]
+; CHECK:       bb8:
+; CHECK-NEXT:    [[PHI9]] = phi i32 [ -60516, [[BB5:%.*]] ], [ [[ADD]], [[BB6:%.*]] ]
+; CHECK-NEXT:    br label [[BB15:%.*]]
+; CHECK:       bb10:
+; CHECK-NEXT:    [[GETELEMENTPTR11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG1]], i64 96
+; CHECK-NEXT:    [[LOAD12:%.*]] = load i64, ptr addrspace(1) [[GETELEMENTPTR11]], align 8, !noundef [[META0:![0-9]+]]
+; CHECK-NEXT:    [[LOAD13:%.*]] = load ptr addrspace(1), ptr @global, align 8, !invariant.load [[META0]], !nonnull [[META0]], !dereferenceable_or_null !1, !align [[META2:![0-9]+]]
+; CHECK-NEXT:    [[GETELEMENTPTR14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[LOAD13]], i64 848
+; CHECK-NEXT:    store i64 [[LOAD12]], ptr addrspace(1) [[GETELEMENTPTR14]], align 8
+; CHECK-NEXT:    ret void
+; CHECK:       bb15:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 99, [[BB8]] ], [ [[IV_NEXT:%.*]], [[BB20:%.*]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[IV]], 1
+; CHECK-NEXT:    [[ICMP17:%.*]] = icmp eq i64 [[AND]], 0
+; CHECK-NEXT:    br i1 [[ICMP17]], label [[BB18:%.*]], label [[BB20]], !prof [[PROF3:![0-9]+]]
+; CHECK:       bb18:
+; CHECK-NEXT:    [[OR:%.*]] = or disjoint i64 [[IV]], 1
+; CHECK-NEXT:    [[GETELEMENTPTR19:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[GETELEMENTPTR]], i64 [[OR]]
+; CHECK-NEXT:    store i64 1, ptr addrspace(1) [[GETELEMENTPTR19]], align 8
+; CHECK-NEXT:    br label [[BB20]]
+; CHECK:       bb20:
+; CHECK-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; CHECK-NEXT:    [[ICMP22:%.*]] = icmp eq i64 [[IV_NEXT]], 90
+; CHECK-NEXT:    br i1 [[ICMP22]], label [[BB6]], label [[BB15]], !prof [[PROF4:![0-9]+]]
+;
+bb5:
+  %getelementptr = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 16
+  br label %bb8
+
+bb6:                                              ; preds = %bb20
+  %add = add nsw i32 %phi9, 1
+  %icmp7 = icmp sgt i32 %phi9, -2
+  br i1 %icmp7, label %bb10, label %bb8
+
+bb8:                                              ; preds = %bb6, %bb5
+  %phi9 = phi i32 [ -60516, %bb5 ], [ %add, %bb6 ]
+  br label %bb15
+
+bb10:                                             ; preds = %bb6
+  %getelementptr11 = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 96
+  %load12 = load i64, ptr addrspace(1) %getelementptr11, align 8,  !noundef !4
+  %load13 = load ptr addrspace(1), ptr @global, align 8, !invariant.load !4, !nonnull !4, !dereferenceable_or_null !16, !align !17
+  %getelementptr14 = getelementptr inbounds i8, ptr addrspace(1) %load13, i64 848
+  store i64 %load12, ptr addrspace(1) %getelementptr14, align 8
+  ret void
+
+bb15:                                             ; preds = %bb20, %bb8
+  %iv = phi i64 [ 99, %bb8 ], [ %iv.next, %bb20 ]
+  %and = and i64 %iv, 1
+  %icmp17 = icmp eq i64 %and, 0
+  br i1 %icmp17, label %bb18, label %bb20, !prof !21
+
+bb18:                                             ; preds = %bb15
+  %or = or disjoint i64 %iv, 1
+  %getelementptr19 = getelementptr inbounds i64, ptr addrspace(1) %getelementptr, i64 %or
+  store i64 1, ptr addrspace(1) %getelementptr19, align 8
+  br label %bb20
+
+bb20:                                             ; preds = %bb18, %bb15
+  %iv.next = add nsw i64 %iv, -1
+  %icmp22 = icmp eq i64 %iv.next, 90
+  br i1 %icmp22, label %bb6, label %bb15, !prof !22
+}
+
+attributes #0 = {"target-cpu"="haswell" "target-features"="+avx2" }
+
+!4 = !{}
+!10 = !{i32 1}
+!16 = !{i64 864}
+!17 = !{i64 8}
+!21 = !{!"branch_weights", i32 1, i32 1}
+!22 = !{!"branch_weights", i32 1, i32 95}
+;.
+; CHECK: [[META0]] = !{}
+; CHECK: [[META2]] = !{i64 8}
+; CHECK: [[PROF3]] = !{!"branch_weights", i32 1, i32 1}
+; CHECK: [[PROF4]] = !{!"branch_weights", i32 1, i32 95}
+;.

``````````

</details>


https://github.com/llvm/llvm-project/pull/81922