[llvm] LV PR 81872 (PR #81922)

Thu Feb 15 13:26:23 PST 2024

https://github.com/annamthomas created https://github.com/llvm/llvm-project/pull/81922

- Precommit test for issue in pr81872
- [LoopVectorize] Fix for miscompile with disjoint or


>From 7977537b05e7812277749293a25f27fbe6b67a40 Mon Sep 17 00:00:00 2001
From: Anna Thomas <anna at azul.com>
Date: Thu, 15 Feb 2024 16:02:53 -0500
Subject: [PATCH 1/2] Precommit test for issue in pr81872

---
 .../Transforms/LoopVectorize/X86/pr81872.ll   | 137 ++++++++++++++++++
 1 file changed, 137 insertions(+)
 create mode 100644 llvm/test/Transforms/LoopVectorize/X86/pr81872.ll

diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll
new file mode 100644
index 00000000000000..1b21b7dbae686c
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll
@@ -0,0 +1,137 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 4
+; RUN: opt -S -passes=loop-vectorize < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at global = external global ptr addrspace(1), align 8
+
+; PR 81872 explains the issue. 
+
+; FIXME: Miscompile where array IV and thereby value stored in (arr[99],
+; arr[98]) is calculated incorrectly since disjoint or was only disjoint because
+; of dominating conditions. Dropping the disjoint to avoid poison still changes
+; the behaviour since now the or is not longer equivalent to the add. 
+; Function Attrs: uwtable
+define void @test(ptr addrspace(1) noundef align 8 dereferenceable_or_null(16) %arg1) #0 {
+; CHECK-LABEL: define void @test(
+; CHECK-SAME: ptr addrspace(1) noundef align 8 dereferenceable_or_null(16) [[ARG1:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  bb5:
+; CHECK-NEXT:    [[GETELEMENTPTR:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG1]], i64 16
+; CHECK-NEXT:    br label [[BB8:%.*]]
+; CHECK:       bb6:
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[PHI9:%.*]], 1
+; CHECK-NEXT:    [[ICMP7:%.*]] = icmp sgt i32 [[PHI9]], -2
+; CHECK-NEXT:    br i1 [[ICMP7]], label [[BB10:%.*]], label [[BB8]]
+; CHECK:       bb8:
+; CHECK-NEXT:    [[PHI9]] = phi i32 [ -60516, [[BB5:%.*]] ], [ [[ADD]], [[BB6:%.*]] ]
+; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]], !prof [[PROF0:![0-9]+]]
+; CHECK:       vector.ph:
+; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
+; CHECK:       vector.body:
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 99, i64 98, i64 97, i64 96>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 99, [[INDEX]]
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
+; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
+; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
+; CHECK-NEXT:    [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
+; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], <i64 8, i64 8, i64 8, i64 8>
+; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
+; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer
+; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
+; CHECK-NEXT:    [[TMP5:%.*]] = or i64 [[TMP0]], 1
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr addrspace(1) [[GETELEMENTPTR]], i64 [[TMP5]]
+; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr addrspace(1) [[TMP6]], i32 0
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr addrspace(1) [[TMP7]], i32 -3
+; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP4]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT:    call void @llvm.masked.store.v4i64.p1(<4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr addrspace(1) [[TMP8]], i32 8, <4 x i1> [[REVERSE]])
+; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
+; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 -4, i64 -4, i64 -4, i64 -4>
+; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12
+; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]]
+; CHECK:       middle.block:
+; CHECK-NEXT:    br i1 true, label [[BB6]], label [[SCALAR_PH]]
+; CHECK:       scalar.ph:
+; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 87, [[MIDDLE_BLOCK]] ], [ 99, [[BB8]] ]
+; CHECK-NEXT:    br label [[BB15:%.*]]
+; CHECK:       bb10:
+; CHECK-NEXT:    [[GETELEMENTPTR11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG1]], i64 96
+; CHECK-NEXT:    [[LOAD12:%.*]] = load i64, ptr addrspace(1) [[GETELEMENTPTR11]], align 8, !noundef [[META5:![0-9]+]]
+; CHECK-NEXT:    [[LOAD13:%.*]] = load ptr addrspace(1), ptr @global, align 8, !invariant.load [[META5]], !nonnull [[META5]], !dereferenceable_or_null !6, !align [[META7:![0-9]+]]
+; CHECK-NEXT:    [[GETELEMENTPTR14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[LOAD13]], i64 848
+; CHECK-NEXT:    store i64 [[LOAD12]], ptr addrspace(1) [[GETELEMENTPTR14]], align 8
+; CHECK-NEXT:    ret void
+; CHECK:       bb15:
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[BB20:%.*]] ]
+; CHECK-NEXT:    [[AND:%.*]] = and i64 [[IV]], 1
+; CHECK-NEXT:    [[ICMP17:%.*]] = icmp eq i64 [[AND]], 0
+; CHECK-NEXT:    br i1 [[ICMP17]], label [[BB18:%.*]], label [[BB20]], !prof [[PROF8:![0-9]+]]
+; CHECK:       bb18:
+; CHECK-NEXT:    [[OR:%.*]] = or disjoint i64 [[IV]], 1
+; CHECK-NEXT:    [[GETELEMENTPTR19:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[GETELEMENTPTR]], i64 [[OR]]
+; CHECK-NEXT:    store i64 1, ptr addrspace(1) [[GETELEMENTPTR19]], align 8
+; CHECK-NEXT:    br label [[BB20]]
+; CHECK:       bb20:
+; CHECK-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
+; CHECK-NEXT:    [[ICMP22:%.*]] = icmp eq i64 [[IV_NEXT]], 90
+; CHECK-NEXT:    br i1 [[ICMP22]], label [[BB6]], label [[BB15]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]]
+;
+bb5:
+  %getelementptr = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 16
+  br label %bb8
+
+bb6:                                              ; preds = %bb20
+  %add = add nsw i32 %phi9, 1
+  %icmp7 = icmp sgt i32 %phi9, -2
+  br i1 %icmp7, label %bb10, label %bb8
+
+bb8:                                              ; preds = %bb6, %bb5
+  %phi9 = phi i32 [ -60516, %bb5 ], [ %add, %bb6 ]
+  br label %bb15
+
+bb10:                                             ; preds = %bb6
+  %getelementptr11 = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 96
+  %load12 = load i64, ptr addrspace(1) %getelementptr11, align 8,  !noundef !4
+  %load13 = load ptr addrspace(1), ptr @global, align 8, !invariant.load !4, !nonnull !4, !dereferenceable_or_null !16, !align !17
+  %getelementptr14 = getelementptr inbounds i8, ptr addrspace(1) %load13, i64 848
+  store i64 %load12, ptr addrspace(1) %getelementptr14, align 8
+  ret void
+
+bb15:                                             ; preds = %bb20, %bb8
+  %iv = phi i64 [ 99, %bb8 ], [ %iv.next, %bb20 ]
+  %and = and i64 %iv, 1
+  %icmp17 = icmp eq i64 %and, 0
+  br i1 %icmp17, label %bb18, label %bb20, !prof !21
+
+bb18:                                             ; preds = %bb15
+  %or = or disjoint i64 %iv, 1
+  %getelementptr19 = getelementptr inbounds i64, ptr addrspace(1) %getelementptr, i64 %or
+  store i64 1, ptr addrspace(1) %getelementptr19, align 8
+  br label %bb20
+
+bb20:                                             ; preds = %bb18, %bb15
+  %iv.next = add nsw i64 %iv, -1
+  %icmp22 = icmp eq i64 %iv.next, 90
+  br i1 %icmp22, label %bb6, label %bb15, !prof !22
+}
+
+attributes #0 = {"target-cpu"="haswell" "target-features"="+avx2" }
+
+!4 = !{}
+!10 = !{i32 1}
+!16 = !{i64 864}
+!17 = !{i64 8}
+!21 = !{!"branch_weights", i32 1, i32 1}
+!22 = !{!"branch_weights", i32 1, i32 95}
+;.
+; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 127}
+; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 23}
+; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]]}
+; CHECK: [[META3]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[META5]] = !{}
+; CHECK: [[META7]] = !{i64 8}
+; CHECK: [[PROF8]] = !{!"branch_weights", i32 1, i32 1}
+; CHECK: [[PROF9]] = !{!"branch_weights", i32 0, i32 0}
+; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]], [[META3]]}
+;.

>From e446dbe9d84b82441706197783dc05c9eacf488d Mon Sep 17 00:00:00 2001
From: Anna Thomas <anna at azul.com>
Date: Thu, 15 Feb 2024 16:10:51 -0500
Subject: [PATCH 2/2] [LoopVectorize] Fix for miscompile with disjoint or

When disjoint or is disjoint because of dominating conditions, we should
not vectorize such loops. This is because vectorizing it causes us to no
longer have the original meaning of the instruction (it is no longer
disjoint, it becomes a regular or which is not an add).

Fixes pr81872.
---
 .../Vectorize/LoopVectorizationLegality.cpp   | 11 ++++
 .../Transforms/LoopVectorize/X86/pr81872.ll   | 59 ++++---------------
 2 files changed, 23 insertions(+), 47 deletions(-)

diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
index 37a356c43e29a4..f2744b5d66a26f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp
@@ -888,6 +888,17 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
         return false;
       } // end of PHI handling
 
+      // or-disjoint instruction which is predicated maybe disjoint because of a
+      // dominating condition. Avoid vectorizing in such cases, since dropping
+      // the disjoint (to avoid poison) changes the meaning of the instruction
+      // which was originally an add.
+      // TODO: We should check the terminating condition of all
+      // dominating blocks in the loop to see if Op0 is used for their
+      // terminating condition.
+      if (auto *Op = dyn_cast<PossiblyDisjointInst>(&I))
+        if (Op->isDisjoint() && blockNeedsPredication(BB))
+          return false;
+
       // We handle calls that:
       //   * Are debug info intrinsics.
       //   * Have a mapping to an IR intrinsic.
diff --git a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll
index 1b21b7dbae686c..20a6732e340b78 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/pr81872.ll
@@ -5,12 +5,12 @@ target triple = "x86_64-unknown-linux-gnu"
 
 @global = external global ptr addrspace(1), align 8
 
-; PR 81872 explains the issue. 
+; PR 81872 explains the issue.
 
-; FIXME: Miscompile where array IV and thereby value stored in (arr[99],
+; If we vectorize, we have a miscompile where array IV and thereby value stored in (arr[99],
 ; arr[98]) is calculated incorrectly since disjoint or was only disjoint because
 ; of dominating conditions. Dropping the disjoint to avoid poison still changes
-; the behaviour since now the or is not longer equivalent to the add. 
+; the behaviour since now the or is not longer equivalent to the add.
 ; Function Attrs: uwtable
 define void @test(ptr addrspace(1) noundef align 8 dereferenceable_or_null(16) %arg1) #0 {
 ; CHECK-LABEL: define void @test(
@@ -24,48 +24,19 @@ define void @test(ptr addrspace(1) noundef align 8 dereferenceable_or_null(16) %
 ; CHECK-NEXT:    br i1 [[ICMP7]], label [[BB10:%.*]], label [[BB8]]
 ; CHECK:       bb8:
 ; CHECK-NEXT:    [[PHI9]] = phi i32 [ -60516, [[BB5:%.*]] ], [ [[ADD]], [[BB6:%.*]] ]
-; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]], !prof [[PROF0:![0-9]+]]
-; CHECK:       vector.ph:
-; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
-; CHECK:       vector.body:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 99, i64 98, i64 97, i64 96>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i64 99, [[INDEX]]
-; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0
-; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> poison, i64 [[INDEX]], i64 0
-; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> poison, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[VEC_IV:%.*]] = add <4 x i64> [[BROADCAST_SPLAT]], <i64 0, i64 1, i64 2, i64 3>
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i64> [[VEC_IV]], <i64 8, i64 8, i64 8, i64 8>
-; CHECK-NEXT:    [[TMP2:%.*]] = and <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
-; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i64> [[TMP2]], zeroinitializer
-; CHECK-NEXT:    [[TMP4:%.*]] = select <4 x i1> [[TMP1]], <4 x i1> [[TMP3]], <4 x i1> zeroinitializer
-; CHECK-NEXT:    [[TMP5:%.*]] = or i64 [[TMP0]], 1
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i64, ptr addrspace(1) [[GETELEMENTPTR]], i64 [[TMP5]]
-; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i64, ptr addrspace(1) [[TMP6]], i32 0
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr i64, ptr addrspace(1) [[TMP7]], i32 -3
-; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i1> [[TMP4]], <4 x i1> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    call void @llvm.masked.store.v4i64.p1(<4 x i64> <i64 1, i64 1, i64 1, i64 1>, ptr addrspace(1) [[TMP8]], i32 8, <4 x i1> [[REVERSE]])
-; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 4
-; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 -4, i64 -4, i64 -4, i64 -4>
-; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], 12
-; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF1:![0-9]+]], !llvm.loop [[LOOP2:![0-9]+]]
-; CHECK:       middle.block:
-; CHECK-NEXT:    br i1 true, label [[BB6]], label [[SCALAR_PH]]
-; CHECK:       scalar.ph:
-; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 87, [[MIDDLE_BLOCK]] ], [ 99, [[BB8]] ]
 ; CHECK-NEXT:    br label [[BB15:%.*]]
 ; CHECK:       bb10:
 ; CHECK-NEXT:    [[GETELEMENTPTR11:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[ARG1]], i64 96
-; CHECK-NEXT:    [[LOAD12:%.*]] = load i64, ptr addrspace(1) [[GETELEMENTPTR11]], align 8, !noundef [[META5:![0-9]+]]
-; CHECK-NEXT:    [[LOAD13:%.*]] = load ptr addrspace(1), ptr @global, align 8, !invariant.load [[META5]], !nonnull [[META5]], !dereferenceable_or_null !6, !align [[META7:![0-9]+]]
+; CHECK-NEXT:    [[LOAD12:%.*]] = load i64, ptr addrspace(1) [[GETELEMENTPTR11]], align 8, !noundef [[META0:![0-9]+]]
+; CHECK-NEXT:    [[LOAD13:%.*]] = load ptr addrspace(1), ptr @global, align 8, !invariant.load [[META0]], !nonnull [[META0]], !dereferenceable_or_null !1, !align [[META2:![0-9]+]]
 ; CHECK-NEXT:    [[GETELEMENTPTR14:%.*]] = getelementptr inbounds i8, ptr addrspace(1) [[LOAD13]], i64 848
 ; CHECK-NEXT:    store i64 [[LOAD12]], ptr addrspace(1) [[GETELEMENTPTR14]], align 8
 ; CHECK-NEXT:    ret void
 ; CHECK:       bb15:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[BB20:%.*]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 99, [[BB8]] ], [ [[IV_NEXT:%.*]], [[BB20:%.*]] ]
 ; CHECK-NEXT:    [[AND:%.*]] = and i64 [[IV]], 1
 ; CHECK-NEXT:    [[ICMP17:%.*]] = icmp eq i64 [[AND]], 0
-; CHECK-NEXT:    br i1 [[ICMP17]], label [[BB18:%.*]], label [[BB20]], !prof [[PROF8:![0-9]+]]
+; CHECK-NEXT:    br i1 [[ICMP17]], label [[BB18:%.*]], label [[BB20]], !prof [[PROF3:![0-9]+]]
 ; CHECK:       bb18:
 ; CHECK-NEXT:    [[OR:%.*]] = or disjoint i64 [[IV]], 1
 ; CHECK-NEXT:    [[GETELEMENTPTR19:%.*]] = getelementptr inbounds i64, ptr addrspace(1) [[GETELEMENTPTR]], i64 [[OR]]
@@ -74,7 +45,7 @@ define void @test(ptr addrspace(1) noundef align 8 dereferenceable_or_null(16) %
 ; CHECK:       bb20:
 ; CHECK-NEXT:    [[IV_NEXT]] = add nsw i64 [[IV]], -1
 ; CHECK-NEXT:    [[ICMP22:%.*]] = icmp eq i64 [[IV_NEXT]], 90
-; CHECK-NEXT:    br i1 [[ICMP22]], label [[BB6]], label [[BB15]], !prof [[PROF9:![0-9]+]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT:    br i1 [[ICMP22]], label [[BB6]], label [[BB15]], !prof [[PROF4:![0-9]+]]
 ;
 bb5:
   %getelementptr = getelementptr inbounds i8, ptr addrspace(1) %arg1, i64 16
@@ -124,14 +95,8 @@ attributes #0 = {"target-cpu"="haswell" "target-features"="+avx2" }
 !21 = !{!"branch_weights", i32 1, i32 1}
 !22 = !{!"branch_weights", i32 1, i32 95}
 ;.
-; CHECK: [[PROF0]] = !{!"branch_weights", i32 1, i32 127}
-; CHECK: [[PROF1]] = !{!"branch_weights", i32 1, i32 23}
-; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]], [[META4:![0-9]+]]}
-; CHECK: [[META3]] = !{!"llvm.loop.isvectorized", i32 1}
-; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[META5]] = !{}
-; CHECK: [[META7]] = !{i64 8}
-; CHECK: [[PROF8]] = !{!"branch_weights", i32 1, i32 1}
-; CHECK: [[PROF9]] = !{!"branch_weights", i32 0, i32 0}
-; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META4]], [[META3]]}
+; CHECK: [[META0]] = !{}
+; CHECK: [[META2]] = !{i64 8}
+; CHECK: [[PROF3]] = !{!"branch_weights", i32 1, i32 1}
+; CHECK: [[PROF4]] = !{!"branch_weights", i32 1, i32 95}
 ;.