[clang] [llvm] [LoopUtils] Fix metadata generated by makeFollowupLoopID (PR #131985)

Ryotaro Kasuga via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 26 01:35:23 PDT 2025


https://github.com/kasuga-fj updated https://github.com/llvm/llvm-project/pull/131985

>From 889f40c5570af8a02e301c2bf3c6382f69210140 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Mon, 17 Mar 2025 11:24:47 +0000
Subject: [PATCH 1/3] [LoopUtils] Fix metadata generated by makeFollowupLoopID

When multiple pragma for loop transformations are specified, such as:

```c
for (...) {
}
```

The generated metadata would look like this:

```
!0 = distinct !{!0, !1, !2}
!1 = !{"llvm.loop.vectorize.enable", i1 true}
!2 = !{"llvm.loop.vectorize.followup_all", !3}
!3 = distinct !{!3, !4, !5}
!4 = !{"llvm.loop.isvectorized"}
!5 = !{"llvm.loop.unroll_count", i32, 8}
```

For a loop with `!0` as its LoopID, the new LoopID after vectorized
should be like as below, so that we can know that this loop is already
vectorized and should be unrolled with specified count:

```
!6 = distinct !{!6, !4, !5}
```

However, the current implementation creates new LoopID like:

```
!7 = distinct !{!7, !3}
```

Therefore subsequent passes like LoopUnroll fails to recognize the
attributes of this loop correctly.

This patch fixes `makeFollowupLoopID`, which creates a new LoopID after
each transformation. If the follow-up metadata (`!3` in the above case)
is a LoopID, the new LoopID will contain its operands (`!4` and `!5`)
instead of the metadata itself.
---
 llvm/lib/Transforms/Utils/LoopUtils.cpp       |  29 +++++
 .../LoopVectorize/make-followup-loop-id.ll    | 102 ++++++++++++++----
 2 files changed, 111 insertions(+), 20 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 84c08556f8a25..4a6105add953f 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -317,6 +317,35 @@ std::optional<MDNode *> llvm::makeFollowupLoopID(
 
     HasAnyFollowup = true;
     for (const MDOperand &Option : drop_begin(FollowupNode->operands())) {
+      // The followup metadata typically forms as follows:
+      //
+      //   !0 = distinct !{!0, !1, !2}
+      //   !1 = !{!"llvm.loop.distribute.enable", i1 true}
+      //   !2 = !{!"llvm.loop.distribute.followup_all", !3}
+      //   !3 = distinct !{!3, !4}
+      //   !4 = !{!"llvm.loop.vectorize.enable", i1 true}
+      //
+      // If we push Option (!3 in this case) in MDs, the new metadata looks
+      // something like:
+      //
+      //   !5 = distinct !{!5, !3}
+      //
+      // This doesn't contain !4, so the vectorization pass doesn't recognize
+      // this loop as vectorization enabled. To make the new metadata contain !4
+      // instead of !3, traverse all of Option's operands and push them into
+      // MDs if Option seems to be a LoopID.
+      if (auto *MDN = dyn_cast<MDNode>(Option)) {
+        // TODO: Is there a proper way to detect LoopID?
+        if (MDN->getNumOperands() > 1 && MDN->getOperand(0) == MDN) {
+          for (const MDOperand &NestedOption : drop_begin(MDN->operands())) {
+            MDs.push_back(NestedOption.get());
+            Changed = true;
+          }
+          continue;
+        }
+      }
+
+      // If Option does't seem to be a LoopID, push it as it is.
       MDs.push_back(Option.get());
       Changed = true;
     }
diff --git a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll
index fa5c206547a07..41f508e0a7641 100644
--- a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll
+++ b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll
@@ -11,10 +11,6 @@
 ;     a[i] *= x;
 ;   }
 ; }
-;
-; FIXME: Currently unrolling is not applied. This is because the new Loop ID
-; created after vectorization does not directly contain unroll metadata.
-; Unexpected nests have been created.
 define void @f(ptr noundef captures(none) %a, float noundef %x) {
 ; CHECK-LABEL: define void @f(
 ; CHECK-SAME: ptr noundef captures(none) [[A:%.*]], float noundef [[X:%.*]]) {
@@ -25,14 +21,47 @@ define void @f(ptr noundef captures(none) %a, float noundef %x) {
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[INDEX_NEXT_6:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[INDEX_NEXT_6:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_6]]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP14]], align 4
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_7]]
-; CHECK-NEXT:    store <4 x float> [[TMP15]], ptr [[TMP2]], align 4
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    store <4 x float> [[TMP15]], ptr [[TMP14]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT1:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT1]]
+; CHECK-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_1]]
+; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT_1:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 8
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_1]]
+; CHECK-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x float>, ptr [[TMP16]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_2]]
+; CHECK-NEXT:    store <4 x float> [[TMP5]], ptr [[TMP16]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT_2:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 12
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_2]]
+; CHECK-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x float>, ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_3]]
+; CHECK-NEXT:    store <4 x float> [[TMP7]], ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT_3:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 16
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_3]]
+; CHECK-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x float>, ptr [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_4]]
+; CHECK-NEXT:    store <4 x float> [[TMP9]], ptr [[TMP8]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT_4:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 20
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_4]]
+; CHECK-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x float>, ptr [[TMP10]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_5]]
+; CHECK-NEXT:    store <4 x float> [[TMP11]], ptr [[TMP10]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT_5:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 24
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_5]]
+; CHECK-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x float>, ptr [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP13:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_6]]
+; CHECK-NEXT:    store <4 x float> [[TMP13]], ptr [[TMP12]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT_7:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 28
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_7]]
+; CHECK-NEXT:    [[WIDE_LOAD_8:%.*]] = load <4 x float>, ptr [[TMP17]], align 4
+; CHECK-NEXT:    [[TMP18:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_8]]
+; CHECK-NEXT:    store <4 x float> [[TMP18]], ptr [[TMP17]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX_NEXT_6]], 32
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; CHECK-NEXT:    br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
@@ -41,14 +70,49 @@ define void @f(ptr noundef captures(none) %a, float noundef %x) {
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
 ; CHECK:       [[FOR_BODY]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT_7:%.*]], %[[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[X]], [[LOAD]]
 ; CHECK-NEXT:    store float [[MUL]], ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT:    [[COMP:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
-; CHECK-NEXT:    br i1 [[COMP]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT:    [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]]
+; CHECK-NEXT:    [[LOAD_1:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[MUL_1:%.*]] = fmul float [[X]], [[LOAD_1]]
+; CHECK-NEXT:    store float [[MUL_1]], ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_1]]
+; CHECK-NEXT:    [[LOAD_2:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT:    [[MUL_2:%.*]] = fmul float [[X]], [[LOAD_2]]
+; CHECK-NEXT:    store float [[MUL_2]], ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT:    [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_2]]
+; CHECK-NEXT:    [[LOAD_3:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT:    [[MUL_3:%.*]] = fmul float [[X]], [[LOAD_3]]
+; CHECK-NEXT:    store float [[MUL_3]], ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT:    [[IV_NEXT_3:%.*]] = add nuw nsw i64 [[IV]], 4
+; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_3]]
+; CHECK-NEXT:    [[LOAD_4:%.*]] = load float, ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT:    [[MUL_4:%.*]] = fmul float [[X]], [[LOAD_4]]
+; CHECK-NEXT:    store float [[MUL_4]], ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT:    [[IV_NEXT_4:%.*]] = add nuw nsw i64 [[IV]], 5
+; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_4]]
+; CHECK-NEXT:    [[LOAD_5:%.*]] = load float, ptr [[ARRAYIDX_5]], align 4
+; CHECK-NEXT:    [[MUL_5:%.*]] = fmul float [[X]], [[LOAD_5]]
+; CHECK-NEXT:    store float [[MUL_5]], ptr [[ARRAYIDX_5]], align 4
+; CHECK-NEXT:    [[IV_NEXT_5:%.*]] = add nuw nsw i64 [[IV]], 6
+; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_5]]
+; CHECK-NEXT:    [[LOAD_6:%.*]] = load float, ptr [[ARRAYIDX_6]], align 4
+; CHECK-NEXT:    [[MUL_6:%.*]] = fmul float [[X]], [[LOAD_6]]
+; CHECK-NEXT:    store float [[MUL_6]], ptr [[ARRAYIDX_6]], align 4
+; CHECK-NEXT:    [[IV_NEXT_6:%.*]] = add nuw nsw i64 [[IV]], 7
+; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_6]]
+; CHECK-NEXT:    [[LOAD_7:%.*]] = load float, ptr [[ARRAYIDX_7]], align 4
+; CHECK-NEXT:    [[MUL_7:%.*]] = fmul float [[X]], [[LOAD_7]]
+; CHECK-NEXT:    store float [[MUL_7]], ptr [[ARRAYIDX_7]], align 4
+; CHECK-NEXT:    [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8
+; CHECK-NEXT:    [[COMP_7:%.*]] = icmp eq i64 [[IV_NEXT_7]], 1024
+; CHECK-NEXT:    br i1 [[COMP_7]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       [[EXIT_LOOPEXIT]]:
 ; CHECK-NEXT:    br label %[[EXIT]]
 ; CHECK:       [[EXIT]]:
@@ -78,10 +142,8 @@ exit:
 !4 = !{!"llvm.loop.isvectorized"}
 !5 = !{!"llvm.loop.unroll.count", i32 8}
 ;.
-; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META4:![0-9]+]]}
-; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
-; CHECK: [[META2]] = !{!"llvm.loop.isvectorized"}
-; CHECK: [[META3]] = !{!"llvm.loop.unroll.count", i32 8}
-; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized"}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
 ;.

>From b1d69e95eb7a75d9809cf77c11367f7bda66ff34 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Tue, 25 Mar 2025 06:43:31 +0000
Subject: [PATCH 2/3] Revert "[LoopUtils] Fix metadata generated by
 makeFollowupLoopID"

This reverts commit 889f40c5570af8a02e301c2bf3c6382f69210140.
---
 llvm/lib/Transforms/Utils/LoopUtils.cpp       |  29 -----
 .../LoopVectorize/make-followup-loop-id.ll    | 102 ++++--------------
 2 files changed, 20 insertions(+), 111 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 4a6105add953f..84c08556f8a25 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -317,35 +317,6 @@ std::optional<MDNode *> llvm::makeFollowupLoopID(
 
     HasAnyFollowup = true;
     for (const MDOperand &Option : drop_begin(FollowupNode->operands())) {
-      // The followup metadata typically forms as follows:
-      //
-      //   !0 = distinct !{!0, !1, !2}
-      //   !1 = !{!"llvm.loop.distribute.enable", i1 true}
-      //   !2 = !{!"llvm.loop.distribute.followup_all", !3}
-      //   !3 = distinct !{!3, !4}
-      //   !4 = !{!"llvm.loop.vectorize.enable", i1 true}
-      //
-      // If we push Option (!3 in this case) in MDs, the new metadata looks
-      // something like:
-      //
-      //   !5 = distinct !{!5, !3}
-      //
-      // This doesn't contain !4, so the vectorization pass doesn't recognize
-      // this loop as vectorization enabled. To make the new metadata contain !4
-      // instead of !3, traverse all of Option's operands and push them into
-      // MDs if Option seems to be a LoopID.
-      if (auto *MDN = dyn_cast<MDNode>(Option)) {
-        // TODO: Is there a proper way to detect LoopID?
-        if (MDN->getNumOperands() > 1 && MDN->getOperand(0) == MDN) {
-          for (const MDOperand &NestedOption : drop_begin(MDN->operands())) {
-            MDs.push_back(NestedOption.get());
-            Changed = true;
-          }
-          continue;
-        }
-      }
-
-      // If Option does't seem to be a LoopID, push it as it is.
       MDs.push_back(Option.get());
       Changed = true;
     }
diff --git a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll
index 41f508e0a7641..fa5c206547a07 100644
--- a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll
+++ b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll
@@ -11,6 +11,10 @@
 ;     a[i] *= x;
 ;   }
 ; }
+;
+; FIXME: Currently unrolling is not applied. This is because the new Loop ID
+; created after vectorization does not directly contain unroll metadata.
+; Unexpected nests have been created.
 define void @f(ptr noundef captures(none) %a, float noundef %x) {
 ; CHECK-LABEL: define void @f(
 ; CHECK-SAME: ptr noundef captures(none) [[A:%.*]], float noundef [[X:%.*]]) {
@@ -21,47 +25,14 @@ define void @f(ptr noundef captures(none) %a, float noundef %x) {
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX_NEXT_6:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT:    [[INDEX_NEXT_6:%.*]] = add i64 [[INDEX]], 0
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_6]]
-; CHECK-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP14]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i32 0
+; CHECK-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_7]]
-; CHECK-NEXT:    store <4 x float> [[TMP15]], ptr [[TMP14]], align 4
-; CHECK-NEXT:    [[INDEX_NEXT1:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 4
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT1]]
-; CHECK-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_1]]
-; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[TMP2]], align 4
-; CHECK-NEXT:    [[INDEX_NEXT_1:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 8
-; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_1]]
-; CHECK-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x float>, ptr [[TMP16]], align 4
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_2]]
-; CHECK-NEXT:    store <4 x float> [[TMP5]], ptr [[TMP16]], align 4
-; CHECK-NEXT:    [[INDEX_NEXT_2:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 12
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_2]]
-; CHECK-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x float>, ptr [[TMP6]], align 4
-; CHECK-NEXT:    [[TMP7:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_3]]
-; CHECK-NEXT:    store <4 x float> [[TMP7]], ptr [[TMP6]], align 4
-; CHECK-NEXT:    [[INDEX_NEXT_3:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 16
-; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_3]]
-; CHECK-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x float>, ptr [[TMP8]], align 4
-; CHECK-NEXT:    [[TMP9:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_4]]
-; CHECK-NEXT:    store <4 x float> [[TMP9]], ptr [[TMP8]], align 4
-; CHECK-NEXT:    [[INDEX_NEXT_4:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 20
-; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_4]]
-; CHECK-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x float>, ptr [[TMP10]], align 4
-; CHECK-NEXT:    [[TMP11:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_5]]
-; CHECK-NEXT:    store <4 x float> [[TMP11]], ptr [[TMP10]], align 4
-; CHECK-NEXT:    [[INDEX_NEXT_5:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 24
-; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_5]]
-; CHECK-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x float>, ptr [[TMP12]], align 4
-; CHECK-NEXT:    [[TMP13:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_6]]
-; CHECK-NEXT:    store <4 x float> [[TMP13]], ptr [[TMP12]], align 4
-; CHECK-NEXT:    [[INDEX_NEXT_7:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 28
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_7]]
-; CHECK-NEXT:    [[WIDE_LOAD_8:%.*]] = load <4 x float>, ptr [[TMP17]], align 4
-; CHECK-NEXT:    [[TMP18:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_8]]
-; CHECK-NEXT:    store <4 x float> [[TMP18]], ptr [[TMP17]], align 4
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX_NEXT_6]], 32
+; CHECK-NEXT:    store <4 x float> [[TMP15]], ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; CHECK-NEXT:    br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
@@ -70,49 +41,14 @@ define void @f(ptr noundef captures(none) %a, float noundef %x) {
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
 ; CHECK:       [[FOR_BODY]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT_7:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[X]], [[LOAD]]
 ; CHECK-NEXT:    store float [[MUL]], ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]]
-; CHECK-NEXT:    [[LOAD_1:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4
-; CHECK-NEXT:    [[MUL_1:%.*]] = fmul float [[X]], [[LOAD_1]]
-; CHECK-NEXT:    store float [[MUL_1]], ptr [[ARRAYIDX_1]], align 4
-; CHECK-NEXT:    [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
-; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_1]]
-; CHECK-NEXT:    [[LOAD_2:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4
-; CHECK-NEXT:    [[MUL_2:%.*]] = fmul float [[X]], [[LOAD_2]]
-; CHECK-NEXT:    store float [[MUL_2]], ptr [[ARRAYIDX_2]], align 4
-; CHECK-NEXT:    [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
-; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_2]]
-; CHECK-NEXT:    [[LOAD_3:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4
-; CHECK-NEXT:    [[MUL_3:%.*]] = fmul float [[X]], [[LOAD_3]]
-; CHECK-NEXT:    store float [[MUL_3]], ptr [[ARRAYIDX_3]], align 4
-; CHECK-NEXT:    [[IV_NEXT_3:%.*]] = add nuw nsw i64 [[IV]], 4
-; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_3]]
-; CHECK-NEXT:    [[LOAD_4:%.*]] = load float, ptr [[ARRAYIDX_4]], align 4
-; CHECK-NEXT:    [[MUL_4:%.*]] = fmul float [[X]], [[LOAD_4]]
-; CHECK-NEXT:    store float [[MUL_4]], ptr [[ARRAYIDX_4]], align 4
-; CHECK-NEXT:    [[IV_NEXT_4:%.*]] = add nuw nsw i64 [[IV]], 5
-; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_4]]
-; CHECK-NEXT:    [[LOAD_5:%.*]] = load float, ptr [[ARRAYIDX_5]], align 4
-; CHECK-NEXT:    [[MUL_5:%.*]] = fmul float [[X]], [[LOAD_5]]
-; CHECK-NEXT:    store float [[MUL_5]], ptr [[ARRAYIDX_5]], align 4
-; CHECK-NEXT:    [[IV_NEXT_5:%.*]] = add nuw nsw i64 [[IV]], 6
-; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_5]]
-; CHECK-NEXT:    [[LOAD_6:%.*]] = load float, ptr [[ARRAYIDX_6]], align 4
-; CHECK-NEXT:    [[MUL_6:%.*]] = fmul float [[X]], [[LOAD_6]]
-; CHECK-NEXT:    store float [[MUL_6]], ptr [[ARRAYIDX_6]], align 4
-; CHECK-NEXT:    [[IV_NEXT_6:%.*]] = add nuw nsw i64 [[IV]], 7
-; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_6]]
-; CHECK-NEXT:    [[LOAD_7:%.*]] = load float, ptr [[ARRAYIDX_7]], align 4
-; CHECK-NEXT:    [[MUL_7:%.*]] = fmul float [[X]], [[LOAD_7]]
-; CHECK-NEXT:    store float [[MUL_7]], ptr [[ARRAYIDX_7]], align 4
-; CHECK-NEXT:    [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8
-; CHECK-NEXT:    [[COMP_7:%.*]] = icmp eq i64 [[IV_NEXT_7]], 1024
-; CHECK-NEXT:    br i1 [[COMP_7]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[COMP:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
+; CHECK-NEXT:    br i1 [[COMP]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
 ; CHECK:       [[EXIT_LOOPEXIT]]:
 ; CHECK-NEXT:    br label %[[EXIT]]
 ; CHECK:       [[EXIT]]:
@@ -142,8 +78,10 @@ exit:
 !4 = !{!"llvm.loop.isvectorized"}
 !5 = !{!"llvm.loop.unroll.count", i32 8}
 ;.
-; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; CHECK: [[META1]] = !{!"llvm.loop.isvectorized"}
-; CHECK: [[META2]] = !{!"llvm.loop.unroll.disable"}
-; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META4:![0-9]+]]}
+; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
+; CHECK: [[META2]] = !{!"llvm.loop.isvectorized"}
+; CHECK: [[META3]] = !{!"llvm.loop.unroll.count", i32 8}
+; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
 ;.

>From ce8febb0befe41694b9d83c14dcfb831a82489ff Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Wed, 26 Mar 2025 07:29:18 +0000
Subject: [PATCH 3/3] [clang][CodeGen] Generate follow-up metadata for loops in
 correct format

When pragma of loop transformations is specified, follow-up metadata for
loops is generated after each transformation. On the LLVM side,
follow-up metadata is expected to be a list of properties, such as the
following:

```
!followup = !{!"llvm.loop.vectorize.followup_all", !mp, !isvectorized}
!mp = !{!"llvm.loop.mustprogress"}
!isvectorized = !{"llvm.loop.isvectorized"}
```

However, on the clang side, the generated metadata contains an MDNode
that has those properties, as shown below:

```
!followup = !{!"llvm.loop.vectorize.followup_all", !loop_id}
!loop_id = distinct !{!loop_id, !mp, !isvectorized}
!mp = !{!"llvm.loop.mustprogress"}
!isvectorized = !{"llvm.loop.isvectorized"}
```

According to the LangRef, the LLVM side is correct. (ref:
https://llvm.org/docs/TransformMetadata.html#transformation-metadata-structure).
Due to this inconsistency, follow-up metadata was not interpreted
correctly, e.g., only one transformation is applied when multiple
pragmas are used.

This patch fixes clang side to emit followup metadata in correct format.
---
 clang/lib/CodeGen/CGLoopInfo.cpp              | 133 ++++++++----------
 clang/lib/CodeGen/CGLoopInfo.h                |  43 +++---
 .../test/CodeGenCXX/pragma-followup_inner.cpp |   9 +-
 .../test/CodeGenCXX/pragma-followup_outer.cpp |  12 +-
 clang/test/CodeGenCXX/pragma-loop.cpp         |  25 +---
 .../LoopVectorize/make-followup-loop-id.ll    | 108 +++++++++++---
 6 files changed, 180 insertions(+), 150 deletions(-)

diff --git a/clang/lib/CodeGen/CGLoopInfo.cpp b/clang/lib/CodeGen/CGLoopInfo.cpp
index 448571221ef81..2b7d7881ab990 100644
--- a/clang/lib/CodeGen/CGLoopInfo.cpp
+++ b/clang/lib/CodeGen/CGLoopInfo.cpp
@@ -22,20 +22,20 @@ using namespace clang::CodeGen;
 using namespace llvm;
 
 MDNode *
-LoopInfo::createLoopPropertiesMetadata(ArrayRef<Metadata *> LoopProperties) {
+LoopInfo::createFollowupMetadata(const char *FollowupName,
+                                 ArrayRef<llvm::Metadata *> LoopProperties) {
   LLVMContext &Ctx = Header->getContext();
-  SmallVector<Metadata *, 4> NewLoopProperties;
-  NewLoopProperties.push_back(nullptr);
-  NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end());
 
-  MDNode *LoopID = MDNode::getDistinct(Ctx, NewLoopProperties);
-  LoopID->replaceOperandWith(0, LoopID);
-  return LoopID;
+  SmallVector<Metadata *, 4> Args;
+  Args.push_back(MDString::get(Ctx, FollowupName));
+  Args.append(LoopProperties.begin(), LoopProperties.end());
+  return MDNode::get(Ctx, Args);
 }
 
-MDNode *LoopInfo::createPipeliningMetadata(const LoopAttributes &Attrs,
-                                           ArrayRef<Metadata *> LoopProperties,
-                                           bool &HasUserTransforms) {
+SmallVector<Metadata *, 4>
+LoopInfo::createPipeliningMetadata(const LoopAttributes &Attrs,
+                                   ArrayRef<Metadata *> LoopProperties,
+                                   bool &HasUserTransforms) {
   LLVMContext &Ctx = Header->getContext();
 
   std::optional<bool> Enabled;
@@ -44,23 +44,19 @@ MDNode *LoopInfo::createPipeliningMetadata(const LoopAttributes &Attrs,
   else if (Attrs.PipelineInitiationInterval != 0)
     Enabled = true;
 
+  SmallVector<Metadata *, 4> Args;
+  Args.append(LoopProperties.begin(), LoopProperties.end());
+
   if (Enabled != true) {
-    SmallVector<Metadata *, 4> NewLoopProperties;
     if (Enabled == false) {
-      NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end());
-      NewLoopProperties.push_back(
+      Args.push_back(
           MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.pipeline.disable"),
                             ConstantAsMetadata::get(ConstantInt::get(
                                 llvm::Type::getInt1Ty(Ctx), 1))}));
-      LoopProperties = NewLoopProperties;
     }
-    return createLoopPropertiesMetadata(LoopProperties);
+    return Args;
   }
 
-  SmallVector<Metadata *, 4> Args;
-  Args.push_back(nullptr);
-  Args.append(LoopProperties.begin(), LoopProperties.end());
-
   if (Attrs.PipelineInitiationInterval > 0) {
     Metadata *Vals[] = {
         MDString::get(Ctx, "llvm.loop.pipeline.initiationinterval"),
@@ -71,13 +67,11 @@ MDNode *LoopInfo::createPipeliningMetadata(const LoopAttributes &Attrs,
 
   // No follow-up: This is the last transformation.
 
-  MDNode *LoopID = MDNode::getDistinct(Ctx, Args);
-  LoopID->replaceOperandWith(0, LoopID);
   HasUserTransforms = true;
-  return LoopID;
+  return Args;
 }
 
-MDNode *
+SmallVector<Metadata *, 4>
 LoopInfo::createPartialUnrollMetadata(const LoopAttributes &Attrs,
                                       ArrayRef<Metadata *> LoopProperties,
                                       bool &HasUserTransforms) {
@@ -108,11 +102,10 @@ LoopInfo::createPartialUnrollMetadata(const LoopAttributes &Attrs,
       MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.disable")));
 
   bool FollowupHasTransforms = false;
-  MDNode *Followup = createPipeliningMetadata(Attrs, FollowupLoopProperties,
-                                              FollowupHasTransforms);
+  SmallVector<Metadata *, 4> Followup = createPipeliningMetadata(
+      Attrs, FollowupLoopProperties, FollowupHasTransforms);
 
   SmallVector<Metadata *, 4> Args;
-  Args.push_back(nullptr);
   Args.append(LoopProperties.begin(), LoopProperties.end());
 
   // Setting unroll.count
@@ -130,16 +123,14 @@ LoopInfo::createPartialUnrollMetadata(const LoopAttributes &Attrs,
   }
 
   if (FollowupHasTransforms)
-    Args.push_back(MDNode::get(
-        Ctx, {MDString::get(Ctx, "llvm.loop.unroll.followup_all"), Followup}));
+    Args.push_back(
+        createFollowupMetadata("llvm.loop.unroll.followup_all", Followup));
 
-  MDNode *LoopID = MDNode::getDistinct(Ctx, Args);
-  LoopID->replaceOperandWith(0, LoopID);
   HasUserTransforms = true;
-  return LoopID;
+  return Args;
 }
 
-MDNode *
+SmallVector<Metadata *, 4>
 LoopInfo::createUnrollAndJamMetadata(const LoopAttributes &Attrs,
                                      ArrayRef<Metadata *> LoopProperties,
                                      bool &HasUserTransforms) {
@@ -170,11 +161,10 @@ LoopInfo::createUnrollAndJamMetadata(const LoopAttributes &Attrs,
       MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll_and_jam.disable")));
 
   bool FollowupHasTransforms = false;
-  MDNode *Followup = createPartialUnrollMetadata(Attrs, FollowupLoopProperties,
-                                                 FollowupHasTransforms);
+  SmallVector<Metadata *, 4> Followup = createPartialUnrollMetadata(
+      Attrs, FollowupLoopProperties, FollowupHasTransforms);
 
   SmallVector<Metadata *, 4> Args;
-  Args.push_back(nullptr);
   Args.append(LoopProperties.begin(), LoopProperties.end());
 
   // Setting unroll_and_jam.count
@@ -192,22 +182,18 @@ LoopInfo::createUnrollAndJamMetadata(const LoopAttributes &Attrs,
   }
 
   if (FollowupHasTransforms)
-    Args.push_back(MDNode::get(
-        Ctx, {MDString::get(Ctx, "llvm.loop.unroll_and_jam.followup_outer"),
-              Followup}));
+    Args.push_back(createFollowupMetadata(
+        "llvm.loop.unroll_and_jam.followup_outer", Followup));
 
-  if (UnrollAndJamInnerFollowup)
-    Args.push_back(MDNode::get(
-        Ctx, {MDString::get(Ctx, "llvm.loop.unroll_and_jam.followup_inner"),
-              UnrollAndJamInnerFollowup}));
+  if (UnrollAndJamInnerFollowup.has_value())
+    Args.push_back(createFollowupMetadata(
+        "llvm.loop.unroll_and_jam.followup_inner", *UnrollAndJamInnerFollowup));
 
-  MDNode *LoopID = MDNode::getDistinct(Ctx, Args);
-  LoopID->replaceOperandWith(0, LoopID);
   HasUserTransforms = true;
-  return LoopID;
+  return Args;
 }
 
-MDNode *
+SmallVector<Metadata *, 4>
 LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs,
                                       ArrayRef<Metadata *> LoopProperties,
                                       bool &HasUserTransforms) {
@@ -244,11 +230,10 @@ LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs,
       MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized")));
 
   bool FollowupHasTransforms = false;
-  MDNode *Followup = createUnrollAndJamMetadata(Attrs, FollowupLoopProperties,
-                                                FollowupHasTransforms);
+  SmallVector<Metadata *, 4> Followup = createUnrollAndJamMetadata(
+      Attrs, FollowupLoopProperties, FollowupHasTransforms);
 
   SmallVector<Metadata *, 4> Args;
-  Args.push_back(nullptr);
   Args.append(LoopProperties.begin(), LoopProperties.end());
 
   // Setting vectorize.predicate when it has been specified and vectorization
@@ -315,17 +300,14 @@ LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs,
   }
 
   if (FollowupHasTransforms)
-    Args.push_back(MDNode::get(
-        Ctx,
-        {MDString::get(Ctx, "llvm.loop.vectorize.followup_all"), Followup}));
+    Args.push_back(
+        createFollowupMetadata("llvm.loop.vectorize.followup_all", Followup));
 
-  MDNode *LoopID = MDNode::getDistinct(Ctx, Args);
-  LoopID->replaceOperandWith(0, LoopID);
   HasUserTransforms = true;
-  return LoopID;
+  return Args;
 }
 
-MDNode *
+SmallVector<Metadata *, 4>
 LoopInfo::createLoopDistributeMetadata(const LoopAttributes &Attrs,
                                        ArrayRef<Metadata *> LoopProperties,
                                        bool &HasUserTransforms) {
@@ -352,11 +334,10 @@ LoopInfo::createLoopDistributeMetadata(const LoopAttributes &Attrs,
   }
 
   bool FollowupHasTransforms = false;
-  MDNode *Followup =
+  SmallVector<Metadata *, 4> Followup =
       createLoopVectorizeMetadata(Attrs, LoopProperties, FollowupHasTransforms);
 
   SmallVector<Metadata *, 4> Args;
-  Args.push_back(nullptr);
   Args.append(LoopProperties.begin(), LoopProperties.end());
 
   Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.distribute.enable"),
@@ -366,19 +347,17 @@ LoopInfo::createLoopDistributeMetadata(const LoopAttributes &Attrs,
   Args.push_back(MDNode::get(Ctx, Vals));
 
   if (FollowupHasTransforms)
-    Args.push_back(MDNode::get(
-        Ctx,
-        {MDString::get(Ctx, "llvm.loop.distribute.followup_all"), Followup}));
+    Args.push_back(
+        createFollowupMetadata("llvm.loop.distribute.followup_all", Followup));
 
-  MDNode *LoopID = MDNode::getDistinct(Ctx, Args);
-  LoopID->replaceOperandWith(0, LoopID);
   HasUserTransforms = true;
-  return LoopID;
+  return Args;
 }
 
-MDNode *LoopInfo::createFullUnrollMetadata(const LoopAttributes &Attrs,
-                                           ArrayRef<Metadata *> LoopProperties,
-                                           bool &HasUserTransforms) {
+SmallVector<Metadata *, 4>
+LoopInfo::createFullUnrollMetadata(const LoopAttributes &Attrs,
+                                   ArrayRef<Metadata *> LoopProperties,
+                                   bool &HasUserTransforms) {
   LLVMContext &Ctx = Header->getContext();
 
   std::optional<bool> Enabled;
@@ -400,20 +379,17 @@ MDNode *LoopInfo::createFullUnrollMetadata(const LoopAttributes &Attrs,
   }
 
   SmallVector<Metadata *, 4> Args;
-  Args.push_back(nullptr);
   Args.append(LoopProperties.begin(), LoopProperties.end());
   Args.push_back(MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.full")));
 
   // No follow-up: there is no loop after full unrolling.
   // TODO: Warn if there are transformations after full unrolling.
 
-  MDNode *LoopID = MDNode::getDistinct(Ctx, Args);
-  LoopID->replaceOperandWith(0, LoopID);
   HasUserTransforms = true;
-  return LoopID;
+  return Args;
 }
 
-MDNode *LoopInfo::createMetadata(
+SmallVector<Metadata *, 4> LoopInfo::createMetadata(
     const LoopAttributes &Attrs,
     llvm::ArrayRef<llvm::Metadata *> AdditionalLoopProperties,
     bool &HasUserTransforms) {
@@ -579,8 +555,8 @@ void LoopInfo::finish() {
             MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized")));
 
       bool InnerFollowupHasTransform = false;
-      MDNode *InnerFollowup = createMetadata(AfterJam, BeforeLoopProperties,
-                                             InnerFollowupHasTransform);
+      SmallVector<Metadata *, 4> InnerFollowup = createMetadata(
+          AfterJam, BeforeLoopProperties, InnerFollowupHasTransform);
       if (InnerFollowupHasTransform)
         Parent->UnrollAndJamInnerFollowup = InnerFollowup;
     }
@@ -589,7 +565,14 @@ void LoopInfo::finish() {
   }
 
   bool HasUserTransforms = false;
-  LoopID = createMetadata(CurLoopAttr, {}, HasUserTransforms);
+  SmallVector<Metadata *, 4> Properties =
+      createMetadata(CurLoopAttr, {}, HasUserTransforms);
+  SmallVector<Metadata *, 4> Args;
+  Args.push_back(nullptr);
+  Args.append(Properties.begin(), Properties.end());
+  LoopID = MDNode::getDistinct(Ctx, Args);
+  LoopID->replaceOperandWith(0, LoopID);
+
   TempLoopID->replaceAllUsesWith(LoopID);
 }
 
diff --git a/clang/lib/CodeGen/CGLoopInfo.h b/clang/lib/CodeGen/CGLoopInfo.h
index 0fe33b2891306..3c57124f4137c 100644
--- a/clang/lib/CodeGen/CGLoopInfo.h
+++ b/clang/lib/CodeGen/CGLoopInfo.h
@@ -132,17 +132,19 @@ class LoopInfo {
   /// If this loop has unroll-and-jam metadata, this can be set by the inner
   /// loop's LoopInfo to set the llvm.loop.unroll_and_jam.followup_inner
   /// metadata.
-  llvm::MDNode *UnrollAndJamInnerFollowup = nullptr;
+  std::optional<llvm::SmallVector<llvm::Metadata *, 4>>
+      UnrollAndJamInnerFollowup;
 
-  /// Create a LoopID without any transformations.
+  /// Create a followup MDNode that has @p LoopProperties as its attributes.
   llvm::MDNode *
-  createLoopPropertiesMetadata(llvm::ArrayRef<llvm::Metadata *> LoopProperties);
+  createFollowupMetadata(const char *FollowupName,
+                         llvm::ArrayRef<llvm::Metadata *> LoopProperties);
 
-  /// Create a LoopID for transformations.
+  /// Create a metadata list for transformations.
   ///
   /// The methods call each other in case multiple transformations are applied
-  /// to a loop. The transformation first to be applied will use LoopID of the
-  /// next transformation in its followup attribute.
+  /// to a loop. The transformation first to be applied will use metadata list
+  /// of the next transformation in its followup attribute.
   ///
   /// @param Attrs             The loop's transformations.
   /// @param LoopProperties    Non-transformation properties such as debug
@@ -152,36 +154,37 @@ class LoopInfo {
   /// @param HasUserTransforms [out] Set to true if the returned MDNode encodes
   ///                          at least one transformation.
   ///
-  /// @return A LoopID (metadata node) that can be used for the llvm.loop
-  ///         annotation or followup-attribute.
+  /// @return A metadata list that can be used for the llvm.loop annotation or
+  ///         followup-attribute.
   /// @{
-  llvm::MDNode *
+  llvm::SmallVector<llvm::Metadata *, 4>
   createPipeliningMetadata(const LoopAttributes &Attrs,
                            llvm::ArrayRef<llvm::Metadata *> LoopProperties,
                            bool &HasUserTransforms);
-  llvm::MDNode *
+  llvm::SmallVector<llvm::Metadata *, 4>
   createPartialUnrollMetadata(const LoopAttributes &Attrs,
                               llvm::ArrayRef<llvm::Metadata *> LoopProperties,
                               bool &HasUserTransforms);
-  llvm::MDNode *
+  llvm::SmallVector<llvm::Metadata *, 4>
   createUnrollAndJamMetadata(const LoopAttributes &Attrs,
                              llvm::ArrayRef<llvm::Metadata *> LoopProperties,
                              bool &HasUserTransforms);
-  llvm::MDNode *
+  llvm::SmallVector<llvm::Metadata *, 4>
   createLoopVectorizeMetadata(const LoopAttributes &Attrs,
                               llvm::ArrayRef<llvm::Metadata *> LoopProperties,
                               bool &HasUserTransforms);
-  llvm::MDNode *
+  llvm::SmallVector<llvm::Metadata *, 4>
   createLoopDistributeMetadata(const LoopAttributes &Attrs,
                                llvm::ArrayRef<llvm::Metadata *> LoopProperties,
                                bool &HasUserTransforms);
-  llvm::MDNode *
+  llvm::SmallVector<llvm::Metadata *, 4>
   createFullUnrollMetadata(const LoopAttributes &Attrs,
                            llvm::ArrayRef<llvm::Metadata *> LoopProperties,
                            bool &HasUserTransforms);
+
   /// @}
 
-  /// Create a LoopID for this loop, including transformation-unspecific
+  /// Create a metadata list for this loop, including transformation-unspecific
   /// metadata such as debug location.
   ///
   /// @param Attrs             This loop's attributes and transformations.
@@ -191,11 +194,11 @@ class LoopInfo {
   /// @param HasUserTransforms [out] Set to true if the returned MDNode encodes
   ///                          at least one transformation.
   ///
-  /// @return A LoopID (metadata node) that can be used for the llvm.loop
-  ///         annotation.
-  llvm::MDNode *createMetadata(const LoopAttributes &Attrs,
-                               llvm::ArrayRef<llvm::Metadata *> LoopProperties,
-                               bool &HasUserTransforms);
+  /// @return A metadata list that can be used for the llvm.loop annotation.
+  llvm::SmallVector<llvm::Metadata *, 4>
+  createMetadata(const LoopAttributes &Attrs,
+                 llvm::ArrayRef<llvm::Metadata *> LoopProperties,
+                 bool &HasUserTransforms);
 };
 
 /// A stack of loop information corresponding to loop nesting levels.
diff --git a/clang/test/CodeGenCXX/pragma-followup_inner.cpp b/clang/test/CodeGenCXX/pragma-followup_inner.cpp
index bdcf8a9cbcc07..04f5656dcd638 100644
--- a/clang/test/CodeGenCXX/pragma-followup_inner.cpp
+++ b/clang/test/CodeGenCXX/pragma-followup_inner.cpp
@@ -23,20 +23,17 @@ extern "C" void followup_inner(int n, int *x) {
 // CHECK-DAG: ![[INNERLOOP_3]] = distinct !{![[INNERLOOP_3]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[DISTRIBUTE_5:[0-9]+]], ![[DISTRIBUTE_FOLLOWUP_6:[0-9]+]]}
 // CHECK-DAG: ![[PARALLEL_ACCESSES_4]] = !{!"llvm.loop.parallel_accesses", ![[ACCESSGROUP_2]]}
 // CHECK-DAG: ![[DISTRIBUTE_5]] = !{!"llvm.loop.distribute.enable", i1 true}
-// CHECK-DAG: ![[DISTRIBUTE_FOLLOWUP_6]] = !{!"llvm.loop.distribute.followup_all", ![[LOOP_7:[0-9]+]]}
 
-// CHECK-DAG: ![[LOOP_7]] = distinct !{![[LOOP_7]], ![[PARALLEL_ACCESSES_4]], ![[VECTORIZE_8:[0-9]+]]}
+// CHECK-DAG: ![[DISTRIBUTE_FOLLOWUP_6]] = !{!"llvm.loop.distribute.followup_all", ![[PARALLEL_ACCESSES_4]], ![[VECTORIZE_8:[0-9]+]]}
 // CHECK-DAG: ![[VECTORIZE_8]] = !{!"llvm.loop.vectorize.enable", i1 true}
 
 // CHECK-DAG: ![[OUTERLOOP_9]] = distinct !{![[OUTERLOOP_9]], [[MP:![0-9]+]], ![[UNROLLANDJAM_COUNT_10:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUPINNER_11:[0-9]+]]}
 // CHECK-DAG: ![[UNROLLANDJAM_COUNT_10]] = !{!"llvm.loop.unroll_and_jam.count", i32 4}
-// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUPINNER_11]] = !{!"llvm.loop.unroll_and_jam.followup_inner", ![[LOOP_12:[0-9]+]]}
 
-// CHECK-DAG: ![[LOOP_12]] = distinct !{![[LOOP_12:[0-9]+]], ![[PARALLEL_ACCESSES_4]], ![[ISVECTORIZED_13:[0-9]+]], ![[UNROLL_COUNT_13:[0-9]+]], ![[UNROLL_FOLLOWUP_14:[0-9]+]]}
+// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUPINNER_11]] = !{!"llvm.loop.unroll_and_jam.followup_inner", ![[PARALLEL_ACCESSES_4]], ![[ISVECTORIZED_13:[0-9]+]], ![[UNROLL_COUNT_13:[0-9]+]], ![[UNROLL_FOLLOWUP_14:[0-9]+]]}
 // CHECK-DAG: ![[ISVECTORIZED_13]] = !{!"llvm.loop.isvectorized"}
 // CHECK-DAG: ![[UNROLL_COUNT_13]] = !{!"llvm.loop.unroll.count", i32 4}
-// CHECK-DAG: ![[UNROLL_FOLLOWUP_14]] = !{!"llvm.loop.unroll.followup_all", ![[LOOP_15:[0-9]+]]}
 
-// CHECK-DAG: ![[LOOP_15]] = distinct !{![[LOOP_15]], ![[PARALLEL_ACCESSES_4]], ![[ISVECTORIZED_13]], ![[UNROLL_DISABLE_16:[0-9]+]], ![[PIPELINE_17:[0-9]+]]}
+// CHECK-DAG: ![[UNROLL_FOLLOWUP_14]] = !{!"llvm.loop.unroll.followup_all", ![[PARALLEL_ACCESSES_4]], ![[ISVECTORIZED_13]], ![[UNROLL_DISABLE_16:[0-9]+]], ![[PIPELINE_17:[0-9]+]]}
 // CHECK-DAG: ![[UNROLL_DISABLE_16]] = !{!"llvm.loop.unroll.disable"}
 // CHECK-DAG: ![[PIPELINE_17]] = !{!"llvm.loop.pipeline.initiationinterval", i32 10}
diff --git a/clang/test/CodeGenCXX/pragma-followup_outer.cpp b/clang/test/CodeGenCXX/pragma-followup_outer.cpp
index c3ca2a7b1c288..fdd5a07d00857 100644
--- a/clang/test/CodeGenCXX/pragma-followup_outer.cpp
+++ b/clang/test/CodeGenCXX/pragma-followup_outer.cpp
@@ -20,22 +20,18 @@ extern "C" void followup_outer(int n, int *x) {
 // CHECK-DAG: ![[LOOP_3:[0-9]+]] = distinct !{![[LOOP_3:[0-9]+]], [[MP:![0-9]+]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[DISTRIBUTE_5:[0-9]+]], ![[DISTRIBUTE_FOLLOWUP_6:[0-9]+]]}
 // CHECK-DAG: ![[PARALLEL_ACCESSES_4:[0-9]+]] = !{!"llvm.loop.parallel_accesses", ![[ACCESSGROUP_2]]}
 // CHECK-DAG: ![[DISTRIBUTE_5:[0-9]+]] = !{!"llvm.loop.distribute.enable", i1 true}
-// CHECK-DAG: ![[DISTRIBUTE_FOLLOWUP_6:[0-9]+]] = !{!"llvm.loop.distribute.followup_all", ![[LOOP_7:[0-9]+]]}
 
-// CHECK-DAG: ![[LOOP_7:[0-9]+]] = distinct !{![[LOOP_7:[0-9]+]], [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[VECTORIZE_8:[0-9]+]], ![[VECTORIZE_FOLLOWUP_9:[0-9]+]]}
+// CHECK-DAG: ![[DISTRIBUTE_FOLLOWUP_6:[0-9]+]] = !{!"llvm.loop.distribute.followup_all", [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[VECTORIZE_8:[0-9]+]], ![[VECTORIZE_FOLLOWUP_9:[0-9]+]]}
 // CHECK-DAG: ![[VECTORIZE_8:[0-9]+]] = !{!"llvm.loop.vectorize.enable", i1 true}
-// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_9:[0-9]+]] = !{!"llvm.loop.vectorize.followup_all", ![[LOOP_10:[0-9]+]]}
 
-// CHECK-DAG: ![[LOOP_10:[0-9]+]] = distinct !{![[LOOP_10:[0-9]+]], [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[ISVECTORIZED_11:[0-9]+]], ![[UNROLLANDJAM_12:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUPOUTER_13:[0-9]+]]}
+// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_9:[0-9]+]] = !{!"llvm.loop.vectorize.followup_all", [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[ISVECTORIZED_11:[0-9]+]], ![[UNROLLANDJAM_12:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUPOUTER_13:[0-9]+]]}
 // CHECK-DAG: ![[ISVECTORIZED_11:[0-9]+]] = !{!"llvm.loop.isvectorized"}
 // CHECK-DAG: ![[UNROLLANDJAM_12:[0-9]+]] =  !{!"llvm.loop.unroll_and_jam.enable"}
-// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUPOUTER_13:[0-9]+]] = !{!"llvm.loop.unroll_and_jam.followup_outer", ![[LOOP_14:[0-9]+]]}
 
-// CHECK-DAG: ![[LOOP_14:[0-9]+]] = distinct !{![[LOOP_14:[0-9]+]], [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[ISVECTORIZED_11:[0-9]+]], ![[UNROLLANDJAM_DISABLE_15:[0-9]+]], ![[UNROLL_COUNT_16:[0-9]+]], ![[UNROLL_FOLLOWUP_17:[0-9]+]]}
+// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUPOUTER_13:[0-9]+]] = !{!"llvm.loop.unroll_and_jam.followup_outer", [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[ISVECTORIZED_11:[0-9]+]], ![[UNROLLANDJAM_DISABLE_15:[0-9]+]], ![[UNROLL_COUNT_16:[0-9]+]], ![[UNROLL_FOLLOWUP_17:[0-9]+]]}
 // CHECK-DAG: ![[UNROLLANDJAM_DISABLE_15:[0-9]+]]  = !{!"llvm.loop.unroll_and_jam.disable"}
 // CHECK-DAG: ![[UNROLL_COUNT_16:[0-9]+]] = !{!"llvm.loop.unroll.count", i32 4}
-// CHECK-DAG: ![[UNROLL_FOLLOWUP_17:[0-9]+]] = !{!"llvm.loop.unroll.followup_all", ![[LOOP_18:[0-9]+]]}
 
-// CHECK-DAG: ![[LOOP_18:[0-9]+]] = distinct !{![[LOOP_18:[0-9]+]], [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[ISVECTORIZED_11:[0-9]+]], ![[UNROLLANDJAM_DISABLE_15:[0-9]+]], ![[UNROLL_DISABLE_19:[0-9]+]], ![[INITIATIONINTERVAL_20:[0-9]+]]}
+// CHECK-DAG: ![[UNROLL_FOLLOWUP_17:[0-9]+]] = !{!"llvm.loop.unroll.followup_all", [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[ISVECTORIZED_11:[0-9]+]], ![[UNROLLANDJAM_DISABLE_15:[0-9]+]], ![[UNROLL_DISABLE_19:[0-9]+]], ![[INITIATIONINTERVAL_20:[0-9]+]]}
 // CHECK-DAG: ![[UNROLL_DISABLE_19:[0-9]+]]  = !{!"llvm.loop.unroll.disable"}
 // CHECK-DAG: ![[INITIATIONINTERVAL_20:[0-9]+]] = !{!"llvm.loop.pipeline.initiationinterval", i32 10}
diff --git a/clang/test/CodeGenCXX/pragma-loop.cpp b/clang/test/CodeGenCXX/pragma-loop.cpp
index 127df41522a57..76bdcc4a5a9c9 100644
--- a/clang/test/CodeGenCXX/pragma-loop.cpp
+++ b/clang/test/CodeGenCXX/pragma-loop.cpp
@@ -215,8 +215,7 @@ void for_test_scalable_1(int *List, int Length) {
 // CHECK: ![[VECTORIZE_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true}
 
 // CHECK: ![[LOOP_3]] = distinct !{![[LOOP_3]], [[MP]], ![[INTERLEAVE_4:.*]], ![[VECTORIZE_ENABLE]], ![[FOLLOWUP_VECTOR_3:.*]]}
-// CHECK: ![[FOLLOWUP_VECTOR_3]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_3:.*]]}
-// CHECK: ![[AFTER_VECTOR_3]] = distinct !{![[AFTER_VECTOR_3]], [[MP]], ![[ISVECTORIZED:.*]], ![[UNROLL_8:.*]]}
+// CHECK: ![[FOLLOWUP_VECTOR_3]] = !{!"llvm.loop.vectorize.followup_all", [[MP]], ![[ISVECTORIZED:.*]], ![[UNROLL_8:.*]]}
 // CHECK: ![[ISVECTORIZED]] = !{!"llvm.loop.isvectorized"}
 // CHECK: ![[UNROLL_8]] = !{!"llvm.loop.unroll.count", i32 8}
 
@@ -227,36 +226,26 @@ void for_test_scalable_1(int *List, int Length) {
 // CHECK: ![[LOOP_5]] = distinct !{![[LOOP_5]], ![[UNROLL_DISABLE:.*]], ![[DISTRIBUTE_DISABLE:.*]], ![[WIDTH_1:.*]]}
 // CHECK: ![[WIDTH_1]] = !{!"llvm.loop.vectorize.width", i32 1}
 
-// CHECK: ![[LOOP_6]] = distinct !{![[LOOP_6]], [[MP]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_2:.*]], ![[FOLLOWUP_VECTOR_6:.*]]}
-// CHECK: ![[FOLLOWUP_VECTOR_6]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_6:.*]]}
-// CHECK: ![[AFTER_VECTOR_6]] = distinct !{![[AFTER_VECTOR_6]], ![[ISVECTORIZED:.*]], ![[UNROLL_8:.*]]}
+// CHECK: ![[LOOP_6]] = distinct !{![[LOOP_6]], [[MP]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_2:.*]], ![[FOLLOWUP_VECTOR_3]]}
 
 // CHECK: ![[LOOP_7]] = distinct !{![[LOOP_7]], [[MP]], ![[WIDTH_5:.*]], ![[FIXED_VEC]], ![[VECTORIZE_ENABLE]]}
 // CHECK: ![[WIDTH_5]] = !{!"llvm.loop.vectorize.width", i32 5}
 
 // CHECK: ![[LOOP_8]] = distinct !{![[LOOP_8]], [[MP]], ![[WIDTH_5:.*]], ![[FIXED_VEC]], ![[VECTORIZE_ENABLE]]}
 
-// CHECK: ![[LOOP_9]] = distinct !{![[LOOP_9]], ![[WIDTH_8:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_8:.*]], ![[FOLLOWUP_VECTOR_9:.*]]}
-// CHECK: ![[FOLLOWUP_VECTOR_9]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_9:.*]]}
-// CHECK: ![[AFTER_VECTOR_9]] = distinct !{![[AFTER_VECTOR_9]], ![[ISVECTORIZED:.*]], ![[UNROLL_8:.*]]}
+// CHECK: ![[LOOP_9]] = distinct !{![[LOOP_9]], ![[WIDTH_8:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_8:.*]], ![[FOLLOWUP_VECTOR_3]]}
 
-// CHECK: ![[LOOP_10]] = distinct !{![[LOOP_10]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_2:.*]], ![[FOLLOWUP_VECTOR_10:.*]]}
-// CHECK: ![[FOLLOWUP_VECTOR_10]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_10:.*]]}
-// CHECK: ![[AFTER_VECTOR_10]] = distinct !{![[AFTER_VECTOR_10]], ![[ISVECTORIZED:.*]], ![[UNROLL_8:.*]]}
+// CHECK: ![[LOOP_10]] = distinct !{![[LOOP_10]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_2:.*]], ![[FOLLOWUP_VECTOR_3]]}
 
-// CHECK: ![[LOOP_11]] = distinct !{![[LOOP_11]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_4:.*]], ![[FOLLOWUP_VECTOR_11:.*]]}
-// CHECK: ![[FOLLOWUP_VECTOR_11]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_11:.*]]}
-// CHECK: ![[AFTER_VECTOR_11]] = distinct !{![[AFTER_VECTOR_11]], ![[ISVECTORIZED:.*]], ![[UNROLL_8:.*]]}
+// CHECK: ![[LOOP_11]] = distinct !{![[LOOP_11]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_4:.*]], ![[FOLLOWUP_VECTOR_3]]}
 
 // CHECK: ![[LOOP_12]] = distinct !{![[LOOP_12]], ![[WIDTH_6:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_10:.*]], ![[FOLLOWUP_VECTOR_12:.*]]}
-// CHECK: ![[FOLLOWUP_VECTOR_12]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_12:.*]]}
-// CHECK: ![[AFTER_VECTOR_12]] = distinct !{![[AFTER_VECTOR_12]], ![[ISVECTORIZED:.*]], ![[UNROLL_24:.*]]}
+// CHECK: ![[FOLLOWUP_VECTOR_12]] = !{!"llvm.loop.vectorize.followup_all", ![[ISVECTORIZED:.*]], ![[UNROLL_24:.*]]}
 // CHECK: ![[UNROLL_24]] = !{!"llvm.loop.unroll.count", i32 24}
 
 // CHECK: ![[LOOP_13]] = distinct !{![[LOOP_13]], ![[WIDTH_8:.*]], ![[INTERLEAVE_16:.*]], ![[VECTORIZE_ENABLE]], ![[FOLLOWUP_VECTOR_13:.*]]}
 // CHECK: ![[INTERLEAVE_16]] = !{!"llvm.loop.interleave.count", i32 16}
-// CHECK: ![[FOLLOWUP_VECTOR_13]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_13:.*]]}
-// CHECK: ![[AFTER_VECTOR_13]] = distinct !{![[AFTER_VECTOR_13]], ![[ISVECTORIZED:.*]], ![[UNROLL_32:.*]]}
+// CHECK: ![[FOLLOWUP_VECTOR_13]] = !{!"llvm.loop.vectorize.followup_all", ![[ISVECTORIZED:.*]], ![[UNROLL_32:.*]]}
 // CHECK: ![[UNROLL_32]] = !{!"llvm.loop.unroll.count", i32 32}
 
 // CHECK: ![[LOOP_14]] = distinct !{![[LOOP_14]], [[MP]], ![[WIDTH_10:.*]], ![[FIXED_VEC]], ![[VECTORIZE_ENABLE]]}
diff --git a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll
index fa5c206547a07..1d633dd6a4e04 100644
--- a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll
+++ b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll
@@ -12,9 +12,6 @@
 ;   }
 ; }
 ;
-; FIXME: Currently unrolling is not applied. This is because the new Loop ID
-; created after vectorization does not directly contain unroll metadata.
-; Unexpected nests have been created.
 define void @f(ptr noundef captures(none) %a, float noundef %x) {
 ; CHECK-LABEL: define void @f(
 ; CHECK-SAME: ptr noundef captures(none) [[A:%.*]], float noundef [[X:%.*]]) {
@@ -25,14 +22,47 @@ define void @f(ptr noundef captures(none) %a, float noundef %x) {
 ; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
 ; CHECK-NEXT:    br label %[[VECTOR_BODY:.*]]
 ; CHECK:       [[VECTOR_BODY]]:
-; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT:    [[INDEX_NEXT_6:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT:    [[INDEX_NEXT_6:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
 ; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_6]]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i32 0
-; CHECK-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP14]], align 4
 ; CHECK-NEXT:    [[TMP15:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_7]]
-; CHECK-NEXT:    store <4 x float> [[TMP15]], ptr [[TMP2]], align 4
-; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT:    store <4 x float> [[TMP15]], ptr [[TMP14]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT1:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 4
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT1]]
+; CHECK-NEXT:    [[WIDE_LOAD_1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_1]]
+; CHECK-NEXT:    store <4 x float> [[TMP3]], ptr [[TMP2]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT_1:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 8
+; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_1]]
+; CHECK-NEXT:    [[WIDE_LOAD_2:%.*]] = load <4 x float>, ptr [[TMP16]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_2]]
+; CHECK-NEXT:    store <4 x float> [[TMP5]], ptr [[TMP16]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT_2:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 12
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_2]]
+; CHECK-NEXT:    [[WIDE_LOAD_3:%.*]] = load <4 x float>, ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_3]]
+; CHECK-NEXT:    store <4 x float> [[TMP7]], ptr [[TMP6]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT_3:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 16
+; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_3]]
+; CHECK-NEXT:    [[WIDE_LOAD_4:%.*]] = load <4 x float>, ptr [[TMP8]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_4]]
+; CHECK-NEXT:    store <4 x float> [[TMP9]], ptr [[TMP8]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT_4:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 20
+; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_4]]
+; CHECK-NEXT:    [[WIDE_LOAD_5:%.*]] = load <4 x float>, ptr [[TMP10]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_5]]
+; CHECK-NEXT:    store <4 x float> [[TMP11]], ptr [[TMP10]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT_5:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 24
+; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_5]]
+; CHECK-NEXT:    [[WIDE_LOAD_6:%.*]] = load <4 x float>, ptr [[TMP12]], align 4
+; CHECK-NEXT:    [[TMP13:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_6]]
+; CHECK-NEXT:    store <4 x float> [[TMP13]], ptr [[TMP12]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT_7:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 28
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_7]]
+; CHECK-NEXT:    [[WIDE_LOAD_8:%.*]] = load <4 x float>, ptr [[TMP17]], align 4
+; CHECK-NEXT:    [[TMP18:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_8]]
+; CHECK-NEXT:    store <4 x float> [[TMP18]], ptr [[TMP17]], align 4
+; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX_NEXT_6]], 32
 ; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
 ; CHECK-NEXT:    br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
 ; CHECK:       [[MIDDLE_BLOCK]]:
@@ -41,14 +71,49 @@ define void @f(ptr noundef captures(none) %a, float noundef %x) {
 ; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
 ; CHECK-NEXT:    br label %[[FOR_BODY:.*]]
 ; CHECK:       [[FOR_BODY]]:
-; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT_7:%.*]], %[[FOR_BODY]] ]
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]]
 ; CHECK-NEXT:    [[LOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[X]], [[LOAD]]
 ; CHECK-NEXT:    store float [[MUL]], ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT:    [[COMP:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
-; CHECK-NEXT:    br i1 [[COMP]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT:    [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT:    [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]]
+; CHECK-NEXT:    [[LOAD_1:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[MUL_1:%.*]] = fmul float [[X]], [[LOAD_1]]
+; CHECK-NEXT:    store float [[MUL_1]], ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT:    [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
+; CHECK-NEXT:    [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_1]]
+; CHECK-NEXT:    [[LOAD_2:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT:    [[MUL_2:%.*]] = fmul float [[X]], [[LOAD_2]]
+; CHECK-NEXT:    store float [[MUL_2]], ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT:    [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
+; CHECK-NEXT:    [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_2]]
+; CHECK-NEXT:    [[LOAD_3:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT:    [[MUL_3:%.*]] = fmul float [[X]], [[LOAD_3]]
+; CHECK-NEXT:    store float [[MUL_3]], ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT:    [[IV_NEXT_3:%.*]] = add nuw nsw i64 [[IV]], 4
+; CHECK-NEXT:    [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_3]]
+; CHECK-NEXT:    [[LOAD_4:%.*]] = load float, ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT:    [[MUL_4:%.*]] = fmul float [[X]], [[LOAD_4]]
+; CHECK-NEXT:    store float [[MUL_4]], ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT:    [[IV_NEXT_4:%.*]] = add nuw nsw i64 [[IV]], 5
+; CHECK-NEXT:    [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_4]]
+; CHECK-NEXT:    [[LOAD_5:%.*]] = load float, ptr [[ARRAYIDX_5]], align 4
+; CHECK-NEXT:    [[MUL_5:%.*]] = fmul float [[X]], [[LOAD_5]]
+; CHECK-NEXT:    store float [[MUL_5]], ptr [[ARRAYIDX_5]], align 4
+; CHECK-NEXT:    [[IV_NEXT_5:%.*]] = add nuw nsw i64 [[IV]], 6
+; CHECK-NEXT:    [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_5]]
+; CHECK-NEXT:    [[LOAD_6:%.*]] = load float, ptr [[ARRAYIDX_6]], align 4
+; CHECK-NEXT:    [[MUL_6:%.*]] = fmul float [[X]], [[LOAD_6]]
+; CHECK-NEXT:    store float [[MUL_6]], ptr [[ARRAYIDX_6]], align 4
+; CHECK-NEXT:    [[IV_NEXT_6:%.*]] = add nuw nsw i64 [[IV]], 7
+; CHECK-NEXT:    [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_6]]
+; CHECK-NEXT:    [[LOAD_7:%.*]] = load float, ptr [[ARRAYIDX_7]], align 4
+; CHECK-NEXT:    [[MUL_7:%.*]] = fmul float [[X]], [[LOAD_7]]
+; CHECK-NEXT:    store float [[MUL_7]], ptr [[ARRAYIDX_7]], align 4
+; CHECK-NEXT:    [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8
+; CHECK-NEXT:    [[COMP_7:%.*]] = icmp eq i64 [[IV_NEXT_7]], 1024
+; CHECK-NEXT:    br i1 [[COMP_7]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
 ; CHECK:       [[EXIT_LOOPEXIT]]:
 ; CHECK-NEXT:    br label %[[EXIT]]
 ; CHECK:       [[EXIT]]:
@@ -73,15 +138,12 @@ exit:
 
 !0 = distinct !{!0, !1, !2}
 !1 = !{!"llvm.loop.vectorize.enable", i1 true}
-!2 = !{!"llvm.loop.vectorize.followup_all", !3}
-!3 = distinct !{!3, !4, !5}
-!4 = !{!"llvm.loop.isvectorized"}
-!5 = !{!"llvm.loop.unroll.count", i32 8}
+!2 = !{!"llvm.loop.vectorize.followup_all", !3, !4}
+!3 = !{!"llvm.loop.isvectorized"}
+!4 = !{!"llvm.loop.unroll.count", i32 8}
 ;.
-; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META4:![0-9]+]]}
-; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
-; CHECK: [[META2]] = !{!"llvm.loop.isvectorized"}
-; CHECK: [[META3]] = !{!"llvm.loop.unroll.count", i32 8}
-; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized"}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
 ;.



More information about the llvm-commits mailing list