[clang] [llvm] [LoopUtils] Fix metadata generated by makeFollowupLoopID (PR #131985)
Ryotaro Kasuga via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 26 01:35:23 PDT 2025
https://github.com/kasuga-fj updated https://github.com/llvm/llvm-project/pull/131985
>From 889f40c5570af8a02e301c2bf3c6382f69210140 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Mon, 17 Mar 2025 11:24:47 +0000
Subject: [PATCH 1/3] [LoopUtils] Fix metadata generated by makeFollowupLoopID
When multiple pragma for loop transformations are specified, such as:
```c
for (...) {
}
```
The generated metadata would look like this:
```
!0 = distinct !{!0, !1, !2}
!1 = !{"llvm.loop.vectorize.enable", i1 true}
!2 = !{"llvm.loop.vectorize.followup_all", !3}
!3 = distinct !{!3, !4, !5}
!4 = !{"llvm.loop.isvectorized"}
!5 = !{"llvm.loop.unroll_count", i32, 8}
```
For a loop with `!0` as its LoopID, the new LoopID after vectorized
should be like as below, so that we can know that this loop is already
vectorized and should be unrolled with specified count:
```
!6 = distinct !{!6, !4, !5}
```
However, the current implementation creates new LoopID like:
```
!7 = distinct !{!7, !3}
```
Therefore subsequent passes like LoopUnroll fails to recognize the
attributes of this loop correctly.
This patch fixes `makeFollowupLoopID`, which creates a new LoopID after
each transformation. If the follow-up metadata (`!3` in the above case)
is a LoopID, the new LoopID will contain its operands (`!4` and `!5`)
instead of the metadata itself.
---
llvm/lib/Transforms/Utils/LoopUtils.cpp | 29 +++++
.../LoopVectorize/make-followup-loop-id.ll | 102 ++++++++++++++----
2 files changed, 111 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 84c08556f8a25..4a6105add953f 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -317,6 +317,35 @@ std::optional<MDNode *> llvm::makeFollowupLoopID(
HasAnyFollowup = true;
for (const MDOperand &Option : drop_begin(FollowupNode->operands())) {
+ // The followup metadata typically forms as follows:
+ //
+ // !0 = distinct !{!0, !1, !2}
+ // !1 = !{!"llvm.loop.distribute.enable", i1 true}
+ // !2 = !{!"llvm.loop.distribute.followup_all", !3}
+ // !3 = distinct !{!3, !4}
+ // !4 = !{!"llvm.loop.vectorize.enable", i1 true}
+ //
+ // If we push Option (!3 in this case) in MDs, the new metadata looks
+ // something like:
+ //
+ // !5 = distinct !{!5, !3}
+ //
+ // This doesn't contain !4, so the vectorization pass doesn't recognize
+ // this loop as vectorization enabled. To make the new metadata contain !4
+ // instead of !3, traverse all of Option's operands and push them into
+ // MDs if Option seems to be a LoopID.
+ if (auto *MDN = dyn_cast<MDNode>(Option)) {
+ // TODO: Is there a proper way to detect LoopID?
+ if (MDN->getNumOperands() > 1 && MDN->getOperand(0) == MDN) {
+ for (const MDOperand &NestedOption : drop_begin(MDN->operands())) {
+ MDs.push_back(NestedOption.get());
+ Changed = true;
+ }
+ continue;
+ }
+ }
+
+ // If Option does't seem to be a LoopID, push it as it is.
MDs.push_back(Option.get());
Changed = true;
}
diff --git a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll
index fa5c206547a07..41f508e0a7641 100644
--- a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll
+++ b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll
@@ -11,10 +11,6 @@
; a[i] *= x;
; }
; }
-;
-; FIXME: Currently unrolling is not applied. This is because the new Loop ID
-; created after vectorization does not directly contain unroll metadata.
-; Unexpected nests have been created.
define void @f(ptr noundef captures(none) %a, float noundef %x) {
; CHECK-LABEL: define void @f(
; CHECK-SAME: ptr noundef captures(none) [[A:%.*]], float noundef [[X:%.*]]) {
@@ -25,14 +21,47 @@ define void @f(ptr noundef captures(none) %a, float noundef %x) {
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[INDEX_NEXT_6:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[INDEX_NEXT_6:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_6]]
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP14]], align 4
; CHECK-NEXT: [[TMP15:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_7]]
-; CHECK-NEXT: store <4 x float> [[TMP15]], ptr [[TMP2]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: store <4 x float> [[TMP15]], ptr [[TMP14]], align 4
+; CHECK-NEXT: [[INDEX_NEXT1:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT1]]
+; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_1]]
+; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP2]], align 4
+; CHECK-NEXT: [[INDEX_NEXT_1:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 8
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_1]]
+; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x float>, ptr [[TMP16]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_2]]
+; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP16]], align 4
+; CHECK-NEXT: [[INDEX_NEXT_2:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 12
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_2]]
+; CHECK-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x float>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_3]]
+; CHECK-NEXT: store <4 x float> [[TMP7]], ptr [[TMP6]], align 4
+; CHECK-NEXT: [[INDEX_NEXT_3:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 16
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_3]]
+; CHECK-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x float>, ptr [[TMP8]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_4]]
+; CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[TMP8]], align 4
+; CHECK-NEXT: [[INDEX_NEXT_4:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 20
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_4]]
+; CHECK-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x float>, ptr [[TMP10]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_5]]
+; CHECK-NEXT: store <4 x float> [[TMP11]], ptr [[TMP10]], align 4
+; CHECK-NEXT: [[INDEX_NEXT_5:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 24
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_5]]
+; CHECK-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x float>, ptr [[TMP12]], align 4
+; CHECK-NEXT: [[TMP13:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_6]]
+; CHECK-NEXT: store <4 x float> [[TMP13]], ptr [[TMP12]], align 4
+; CHECK-NEXT: [[INDEX_NEXT_7:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 28
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_7]]
+; CHECK-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x float>, ptr [[TMP17]], align 4
+; CHECK-NEXT: [[TMP18:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_8]]
+; CHECK-NEXT: store <4 x float> [[TMP18]], ptr [[TMP17]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX_NEXT_6]], 32
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -41,14 +70,49 @@ define void @f(ptr noundef captures(none) %a, float noundef %x) {
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT_7:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[MUL:%.*]] = fmul float [[X]], [[LOAD]]
; CHECK-NEXT: store float [[MUL]], ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[COMP:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
-; CHECK-NEXT: br i1 [[COMP]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]]
+; CHECK-NEXT: [[LOAD_1:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[X]], [[LOAD_1]]
+; CHECK-NEXT: store float [[MUL_1]], ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_1]]
+; CHECK-NEXT: [[LOAD_2:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[MUL_2:%.*]] = fmul float [[X]], [[LOAD_2]]
+; CHECK-NEXT: store float [[MUL_2]], ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_2]]
+; CHECK-NEXT: [[LOAD_3:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[MUL_3:%.*]] = fmul float [[X]], [[LOAD_3]]
+; CHECK-NEXT: store float [[MUL_3]], ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[IV_NEXT_3:%.*]] = add nuw nsw i64 [[IV]], 4
+; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_3]]
+; CHECK-NEXT: [[LOAD_4:%.*]] = load float, ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT: [[MUL_4:%.*]] = fmul float [[X]], [[LOAD_4]]
+; CHECK-NEXT: store float [[MUL_4]], ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT: [[IV_NEXT_4:%.*]] = add nuw nsw i64 [[IV]], 5
+; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_4]]
+; CHECK-NEXT: [[LOAD_5:%.*]] = load float, ptr [[ARRAYIDX_5]], align 4
+; CHECK-NEXT: [[MUL_5:%.*]] = fmul float [[X]], [[LOAD_5]]
+; CHECK-NEXT: store float [[MUL_5]], ptr [[ARRAYIDX_5]], align 4
+; CHECK-NEXT: [[IV_NEXT_5:%.*]] = add nuw nsw i64 [[IV]], 6
+; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_5]]
+; CHECK-NEXT: [[LOAD_6:%.*]] = load float, ptr [[ARRAYIDX_6]], align 4
+; CHECK-NEXT: [[MUL_6:%.*]] = fmul float [[X]], [[LOAD_6]]
+; CHECK-NEXT: store float [[MUL_6]], ptr [[ARRAYIDX_6]], align 4
+; CHECK-NEXT: [[IV_NEXT_6:%.*]] = add nuw nsw i64 [[IV]], 7
+; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_6]]
+; CHECK-NEXT: [[LOAD_7:%.*]] = load float, ptr [[ARRAYIDX_7]], align 4
+; CHECK-NEXT: [[MUL_7:%.*]] = fmul float [[X]], [[LOAD_7]]
+; CHECK-NEXT: store float [[MUL_7]], ptr [[ARRAYIDX_7]], align 4
+; CHECK-NEXT: [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8
+; CHECK-NEXT: [[COMP_7:%.*]] = icmp eq i64 [[IV_NEXT_7]], 1024
+; CHECK-NEXT: br i1 [[COMP_7]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT_LOOPEXIT]]:
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
@@ -78,10 +142,8 @@ exit:
!4 = !{!"llvm.loop.isvectorized"}
!5 = !{!"llvm.loop.unroll.count", i32 8}
;.
-; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META4:![0-9]+]]}
-; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
-; CHECK: [[META2]] = !{!"llvm.loop.isvectorized"}
-; CHECK: [[META3]] = !{!"llvm.loop.unroll.count", i32 8}
-; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized"}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
;.
>From b1d69e95eb7a75d9809cf77c11367f7bda66ff34 Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Tue, 25 Mar 2025 06:43:31 +0000
Subject: [PATCH 2/3] Revert "[LoopUtils] Fix metadata generated by
makeFollowupLoopID"
This reverts commit 889f40c5570af8a02e301c2bf3c6382f69210140.
---
llvm/lib/Transforms/Utils/LoopUtils.cpp | 29 -----
.../LoopVectorize/make-followup-loop-id.ll | 102 ++++--------------
2 files changed, 20 insertions(+), 111 deletions(-)
diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 4a6105add953f..84c08556f8a25 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -317,35 +317,6 @@ std::optional<MDNode *> llvm::makeFollowupLoopID(
HasAnyFollowup = true;
for (const MDOperand &Option : drop_begin(FollowupNode->operands())) {
- // The followup metadata typically forms as follows:
- //
- // !0 = distinct !{!0, !1, !2}
- // !1 = !{!"llvm.loop.distribute.enable", i1 true}
- // !2 = !{!"llvm.loop.distribute.followup_all", !3}
- // !3 = distinct !{!3, !4}
- // !4 = !{!"llvm.loop.vectorize.enable", i1 true}
- //
- // If we push Option (!3 in this case) in MDs, the new metadata looks
- // something like:
- //
- // !5 = distinct !{!5, !3}
- //
- // This doesn't contain !4, so the vectorization pass doesn't recognize
- // this loop as vectorization enabled. To make the new metadata contain !4
- // instead of !3, traverse all of Option's operands and push them into
- // MDs if Option seems to be a LoopID.
- if (auto *MDN = dyn_cast<MDNode>(Option)) {
- // TODO: Is there a proper way to detect LoopID?
- if (MDN->getNumOperands() > 1 && MDN->getOperand(0) == MDN) {
- for (const MDOperand &NestedOption : drop_begin(MDN->operands())) {
- MDs.push_back(NestedOption.get());
- Changed = true;
- }
- continue;
- }
- }
-
- // If Option does't seem to be a LoopID, push it as it is.
MDs.push_back(Option.get());
Changed = true;
}
diff --git a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll
index 41f508e0a7641..fa5c206547a07 100644
--- a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll
+++ b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll
@@ -11,6 +11,10 @@
; a[i] *= x;
; }
; }
+;
+; FIXME: Currently unrolling is not applied. This is because the new Loop ID
+; created after vectorization does not directly contain unroll metadata.
+; Unexpected nests have been created.
define void @f(ptr noundef captures(none) %a, float noundef %x) {
; CHECK-LABEL: define void @f(
; CHECK-SAME: ptr noundef captures(none) [[A:%.*]], float noundef [[X:%.*]]) {
@@ -21,47 +25,14 @@ define void @f(ptr noundef captures(none) %a, float noundef %x) {
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX_NEXT_6:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[INDEX_NEXT_6:%.*]] = add i64 [[INDEX]], 0
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_6]]
-; CHECK-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP14]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i32 0
+; CHECK-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
; CHECK-NEXT: [[TMP15:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_7]]
-; CHECK-NEXT: store <4 x float> [[TMP15]], ptr [[TMP14]], align 4
-; CHECK-NEXT: [[INDEX_NEXT1:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 4
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT1]]
-; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
-; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_1]]
-; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP2]], align 4
-; CHECK-NEXT: [[INDEX_NEXT_1:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 8
-; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_1]]
-; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x float>, ptr [[TMP16]], align 4
-; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_2]]
-; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP16]], align 4
-; CHECK-NEXT: [[INDEX_NEXT_2:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 12
-; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_2]]
-; CHECK-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x float>, ptr [[TMP6]], align 4
-; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_3]]
-; CHECK-NEXT: store <4 x float> [[TMP7]], ptr [[TMP6]], align 4
-; CHECK-NEXT: [[INDEX_NEXT_3:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 16
-; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_3]]
-; CHECK-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x float>, ptr [[TMP8]], align 4
-; CHECK-NEXT: [[TMP9:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_4]]
-; CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[TMP8]], align 4
-; CHECK-NEXT: [[INDEX_NEXT_4:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 20
-; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_4]]
-; CHECK-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x float>, ptr [[TMP10]], align 4
-; CHECK-NEXT: [[TMP11:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_5]]
-; CHECK-NEXT: store <4 x float> [[TMP11]], ptr [[TMP10]], align 4
-; CHECK-NEXT: [[INDEX_NEXT_5:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 24
-; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_5]]
-; CHECK-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x float>, ptr [[TMP12]], align 4
-; CHECK-NEXT: [[TMP13:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_6]]
-; CHECK-NEXT: store <4 x float> [[TMP13]], ptr [[TMP12]], align 4
-; CHECK-NEXT: [[INDEX_NEXT_7:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 28
-; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_7]]
-; CHECK-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x float>, ptr [[TMP17]], align 4
-; CHECK-NEXT: [[TMP18:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_8]]
-; CHECK-NEXT: store <4 x float> [[TMP18]], ptr [[TMP17]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX_NEXT_6]], 32
+; CHECK-NEXT: store <4 x float> [[TMP15]], ptr [[TMP2]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -70,49 +41,14 @@ define void @f(ptr noundef captures(none) %a, float noundef %x) {
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT_7:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[MUL:%.*]] = fmul float [[X]], [[LOAD]]
; CHECK-NEXT: store float [[MUL]], ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]]
-; CHECK-NEXT: [[LOAD_1:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4
-; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[X]], [[LOAD_1]]
-; CHECK-NEXT: store float [[MUL_1]], ptr [[ARRAYIDX_1]], align 4
-; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
-; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_1]]
-; CHECK-NEXT: [[LOAD_2:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4
-; CHECK-NEXT: [[MUL_2:%.*]] = fmul float [[X]], [[LOAD_2]]
-; CHECK-NEXT: store float [[MUL_2]], ptr [[ARRAYIDX_2]], align 4
-; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
-; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_2]]
-; CHECK-NEXT: [[LOAD_3:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4
-; CHECK-NEXT: [[MUL_3:%.*]] = fmul float [[X]], [[LOAD_3]]
-; CHECK-NEXT: store float [[MUL_3]], ptr [[ARRAYIDX_3]], align 4
-; CHECK-NEXT: [[IV_NEXT_3:%.*]] = add nuw nsw i64 [[IV]], 4
-; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_3]]
-; CHECK-NEXT: [[LOAD_4:%.*]] = load float, ptr [[ARRAYIDX_4]], align 4
-; CHECK-NEXT: [[MUL_4:%.*]] = fmul float [[X]], [[LOAD_4]]
-; CHECK-NEXT: store float [[MUL_4]], ptr [[ARRAYIDX_4]], align 4
-; CHECK-NEXT: [[IV_NEXT_4:%.*]] = add nuw nsw i64 [[IV]], 5
-; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_4]]
-; CHECK-NEXT: [[LOAD_5:%.*]] = load float, ptr [[ARRAYIDX_5]], align 4
-; CHECK-NEXT: [[MUL_5:%.*]] = fmul float [[X]], [[LOAD_5]]
-; CHECK-NEXT: store float [[MUL_5]], ptr [[ARRAYIDX_5]], align 4
-; CHECK-NEXT: [[IV_NEXT_5:%.*]] = add nuw nsw i64 [[IV]], 6
-; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_5]]
-; CHECK-NEXT: [[LOAD_6:%.*]] = load float, ptr [[ARRAYIDX_6]], align 4
-; CHECK-NEXT: [[MUL_6:%.*]] = fmul float [[X]], [[LOAD_6]]
-; CHECK-NEXT: store float [[MUL_6]], ptr [[ARRAYIDX_6]], align 4
-; CHECK-NEXT: [[IV_NEXT_6:%.*]] = add nuw nsw i64 [[IV]], 7
-; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_6]]
-; CHECK-NEXT: [[LOAD_7:%.*]] = load float, ptr [[ARRAYIDX_7]], align 4
-; CHECK-NEXT: [[MUL_7:%.*]] = fmul float [[X]], [[LOAD_7]]
-; CHECK-NEXT: store float [[MUL_7]], ptr [[ARRAYIDX_7]], align 4
-; CHECK-NEXT: [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8
-; CHECK-NEXT: [[COMP_7:%.*]] = icmp eq i64 [[IV_NEXT_7]], 1024
-; CHECK-NEXT: br i1 [[COMP_7]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[COMP:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
+; CHECK-NEXT: br i1 [[COMP]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
; CHECK: [[EXIT_LOOPEXIT]]:
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
@@ -142,8 +78,10 @@ exit:
!4 = !{!"llvm.loop.isvectorized"}
!5 = !{!"llvm.loop.unroll.count", i32 8}
;.
-; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
-; CHECK: [[META1]] = !{!"llvm.loop.isvectorized"}
-; CHECK: [[META2]] = !{!"llvm.loop.unroll.disable"}
-; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META4:![0-9]+]]}
+; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
+; CHECK: [[META2]] = !{!"llvm.loop.isvectorized"}
+; CHECK: [[META3]] = !{!"llvm.loop.unroll.count", i32 8}
+; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
;.
>From ce8febb0befe41694b9d83c14dcfb831a82489ff Mon Sep 17 00:00:00 2001
From: Ryotaro Kasuga <kasuga.ryotaro at fujitsu.com>
Date: Wed, 26 Mar 2025 07:29:18 +0000
Subject: [PATCH 3/3] [clang][CodeGen] Generate follow-up metadata for loops in
correct format
When pragma of loop transformations is specified, follow-up metadata for
loops is generated after each transformation. On the LLVM side,
follow-up metadata is expected to be a list of properties, such as the
following:
```
!followup = !{!"llvm.loop.vectorize.followup_all", !mp, !isvectorized}
!mp = !{!"llvm.loop.mustprogress"}
!isvectorized = !{"llvm.loop.isvectorized"}
```
However, on the clang side, the generated metadata contains an MDNode
that has those properties, as shown below:
```
!followup = !{!"llvm.loop.vectorize.followup_all", !loop_id}
!loop_id = distinct !{!loop_id, !mp, !isvectorized}
!mp = !{!"llvm.loop.mustprogress"}
!isvectorized = !{"llvm.loop.isvectorized"}
```
According to the LangRef, the LLVM side is correct. (ref:
https://llvm.org/docs/TransformMetadata.html#transformation-metadata-structure).
Due to this inconsistency, follow-up metadata was not interpreted
correctly, e.g., only one transformation is applied when multiple
pragmas are used.
This patch fixes clang side to emit followup metadata in correct format.
---
clang/lib/CodeGen/CGLoopInfo.cpp | 133 ++++++++----------
clang/lib/CodeGen/CGLoopInfo.h | 43 +++---
.../test/CodeGenCXX/pragma-followup_inner.cpp | 9 +-
.../test/CodeGenCXX/pragma-followup_outer.cpp | 12 +-
clang/test/CodeGenCXX/pragma-loop.cpp | 25 +---
.../LoopVectorize/make-followup-loop-id.ll | 108 +++++++++++---
6 files changed, 180 insertions(+), 150 deletions(-)
diff --git a/clang/lib/CodeGen/CGLoopInfo.cpp b/clang/lib/CodeGen/CGLoopInfo.cpp
index 448571221ef81..2b7d7881ab990 100644
--- a/clang/lib/CodeGen/CGLoopInfo.cpp
+++ b/clang/lib/CodeGen/CGLoopInfo.cpp
@@ -22,20 +22,20 @@ using namespace clang::CodeGen;
using namespace llvm;
MDNode *
-LoopInfo::createLoopPropertiesMetadata(ArrayRef<Metadata *> LoopProperties) {
+LoopInfo::createFollowupMetadata(const char *FollowupName,
+ ArrayRef<llvm::Metadata *> LoopProperties) {
LLVMContext &Ctx = Header->getContext();
- SmallVector<Metadata *, 4> NewLoopProperties;
- NewLoopProperties.push_back(nullptr);
- NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end());
- MDNode *LoopID = MDNode::getDistinct(Ctx, NewLoopProperties);
- LoopID->replaceOperandWith(0, LoopID);
- return LoopID;
+ SmallVector<Metadata *, 4> Args;
+ Args.push_back(MDString::get(Ctx, FollowupName));
+ Args.append(LoopProperties.begin(), LoopProperties.end());
+ return MDNode::get(Ctx, Args);
}
-MDNode *LoopInfo::createPipeliningMetadata(const LoopAttributes &Attrs,
- ArrayRef<Metadata *> LoopProperties,
- bool &HasUserTransforms) {
+SmallVector<Metadata *, 4>
+LoopInfo::createPipeliningMetadata(const LoopAttributes &Attrs,
+ ArrayRef<Metadata *> LoopProperties,
+ bool &HasUserTransforms) {
LLVMContext &Ctx = Header->getContext();
std::optional<bool> Enabled;
@@ -44,23 +44,19 @@ MDNode *LoopInfo::createPipeliningMetadata(const LoopAttributes &Attrs,
else if (Attrs.PipelineInitiationInterval != 0)
Enabled = true;
+ SmallVector<Metadata *, 4> Args;
+ Args.append(LoopProperties.begin(), LoopProperties.end());
+
if (Enabled != true) {
- SmallVector<Metadata *, 4> NewLoopProperties;
if (Enabled == false) {
- NewLoopProperties.append(LoopProperties.begin(), LoopProperties.end());
- NewLoopProperties.push_back(
+ Args.push_back(
MDNode::get(Ctx, {MDString::get(Ctx, "llvm.loop.pipeline.disable"),
ConstantAsMetadata::get(ConstantInt::get(
llvm::Type::getInt1Ty(Ctx), 1))}));
- LoopProperties = NewLoopProperties;
}
- return createLoopPropertiesMetadata(LoopProperties);
+ return Args;
}
- SmallVector<Metadata *, 4> Args;
- Args.push_back(nullptr);
- Args.append(LoopProperties.begin(), LoopProperties.end());
-
if (Attrs.PipelineInitiationInterval > 0) {
Metadata *Vals[] = {
MDString::get(Ctx, "llvm.loop.pipeline.initiationinterval"),
@@ -71,13 +67,11 @@ MDNode *LoopInfo::createPipeliningMetadata(const LoopAttributes &Attrs,
// No follow-up: This is the last transformation.
- MDNode *LoopID = MDNode::getDistinct(Ctx, Args);
- LoopID->replaceOperandWith(0, LoopID);
HasUserTransforms = true;
- return LoopID;
+ return Args;
}
-MDNode *
+SmallVector<Metadata *, 4>
LoopInfo::createPartialUnrollMetadata(const LoopAttributes &Attrs,
ArrayRef<Metadata *> LoopProperties,
bool &HasUserTransforms) {
@@ -108,11 +102,10 @@ LoopInfo::createPartialUnrollMetadata(const LoopAttributes &Attrs,
MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.disable")));
bool FollowupHasTransforms = false;
- MDNode *Followup = createPipeliningMetadata(Attrs, FollowupLoopProperties,
- FollowupHasTransforms);
+ SmallVector<Metadata *, 4> Followup = createPipeliningMetadata(
+ Attrs, FollowupLoopProperties, FollowupHasTransforms);
SmallVector<Metadata *, 4> Args;
- Args.push_back(nullptr);
Args.append(LoopProperties.begin(), LoopProperties.end());
// Setting unroll.count
@@ -130,16 +123,14 @@ LoopInfo::createPartialUnrollMetadata(const LoopAttributes &Attrs,
}
if (FollowupHasTransforms)
- Args.push_back(MDNode::get(
- Ctx, {MDString::get(Ctx, "llvm.loop.unroll.followup_all"), Followup}));
+ Args.push_back(
+ createFollowupMetadata("llvm.loop.unroll.followup_all", Followup));
- MDNode *LoopID = MDNode::getDistinct(Ctx, Args);
- LoopID->replaceOperandWith(0, LoopID);
HasUserTransforms = true;
- return LoopID;
+ return Args;
}
-MDNode *
+SmallVector<Metadata *, 4>
LoopInfo::createUnrollAndJamMetadata(const LoopAttributes &Attrs,
ArrayRef<Metadata *> LoopProperties,
bool &HasUserTransforms) {
@@ -170,11 +161,10 @@ LoopInfo::createUnrollAndJamMetadata(const LoopAttributes &Attrs,
MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll_and_jam.disable")));
bool FollowupHasTransforms = false;
- MDNode *Followup = createPartialUnrollMetadata(Attrs, FollowupLoopProperties,
- FollowupHasTransforms);
+ SmallVector<Metadata *, 4> Followup = createPartialUnrollMetadata(
+ Attrs, FollowupLoopProperties, FollowupHasTransforms);
SmallVector<Metadata *, 4> Args;
- Args.push_back(nullptr);
Args.append(LoopProperties.begin(), LoopProperties.end());
// Setting unroll_and_jam.count
@@ -192,22 +182,18 @@ LoopInfo::createUnrollAndJamMetadata(const LoopAttributes &Attrs,
}
if (FollowupHasTransforms)
- Args.push_back(MDNode::get(
- Ctx, {MDString::get(Ctx, "llvm.loop.unroll_and_jam.followup_outer"),
- Followup}));
+ Args.push_back(createFollowupMetadata(
+ "llvm.loop.unroll_and_jam.followup_outer", Followup));
- if (UnrollAndJamInnerFollowup)
- Args.push_back(MDNode::get(
- Ctx, {MDString::get(Ctx, "llvm.loop.unroll_and_jam.followup_inner"),
- UnrollAndJamInnerFollowup}));
+ if (UnrollAndJamInnerFollowup.has_value())
+ Args.push_back(createFollowupMetadata(
+ "llvm.loop.unroll_and_jam.followup_inner", *UnrollAndJamInnerFollowup));
- MDNode *LoopID = MDNode::getDistinct(Ctx, Args);
- LoopID->replaceOperandWith(0, LoopID);
HasUserTransforms = true;
- return LoopID;
+ return Args;
}
-MDNode *
+SmallVector<Metadata *, 4>
LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs,
ArrayRef<Metadata *> LoopProperties,
bool &HasUserTransforms) {
@@ -244,11 +230,10 @@ LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs,
MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized")));
bool FollowupHasTransforms = false;
- MDNode *Followup = createUnrollAndJamMetadata(Attrs, FollowupLoopProperties,
- FollowupHasTransforms);
+ SmallVector<Metadata *, 4> Followup = createUnrollAndJamMetadata(
+ Attrs, FollowupLoopProperties, FollowupHasTransforms);
SmallVector<Metadata *, 4> Args;
- Args.push_back(nullptr);
Args.append(LoopProperties.begin(), LoopProperties.end());
// Setting vectorize.predicate when it has been specified and vectorization
@@ -315,17 +300,14 @@ LoopInfo::createLoopVectorizeMetadata(const LoopAttributes &Attrs,
}
if (FollowupHasTransforms)
- Args.push_back(MDNode::get(
- Ctx,
- {MDString::get(Ctx, "llvm.loop.vectorize.followup_all"), Followup}));
+ Args.push_back(
+ createFollowupMetadata("llvm.loop.vectorize.followup_all", Followup));
- MDNode *LoopID = MDNode::getDistinct(Ctx, Args);
- LoopID->replaceOperandWith(0, LoopID);
HasUserTransforms = true;
- return LoopID;
+ return Args;
}
-MDNode *
+SmallVector<Metadata *, 4>
LoopInfo::createLoopDistributeMetadata(const LoopAttributes &Attrs,
ArrayRef<Metadata *> LoopProperties,
bool &HasUserTransforms) {
@@ -352,11 +334,10 @@ LoopInfo::createLoopDistributeMetadata(const LoopAttributes &Attrs,
}
bool FollowupHasTransforms = false;
- MDNode *Followup =
+ SmallVector<Metadata *, 4> Followup =
createLoopVectorizeMetadata(Attrs, LoopProperties, FollowupHasTransforms);
SmallVector<Metadata *, 4> Args;
- Args.push_back(nullptr);
Args.append(LoopProperties.begin(), LoopProperties.end());
Metadata *Vals[] = {MDString::get(Ctx, "llvm.loop.distribute.enable"),
@@ -366,19 +347,17 @@ LoopInfo::createLoopDistributeMetadata(const LoopAttributes &Attrs,
Args.push_back(MDNode::get(Ctx, Vals));
if (FollowupHasTransforms)
- Args.push_back(MDNode::get(
- Ctx,
- {MDString::get(Ctx, "llvm.loop.distribute.followup_all"), Followup}));
+ Args.push_back(
+ createFollowupMetadata("llvm.loop.distribute.followup_all", Followup));
- MDNode *LoopID = MDNode::getDistinct(Ctx, Args);
- LoopID->replaceOperandWith(0, LoopID);
HasUserTransforms = true;
- return LoopID;
+ return Args;
}
-MDNode *LoopInfo::createFullUnrollMetadata(const LoopAttributes &Attrs,
- ArrayRef<Metadata *> LoopProperties,
- bool &HasUserTransforms) {
+SmallVector<Metadata *, 4>
+LoopInfo::createFullUnrollMetadata(const LoopAttributes &Attrs,
+ ArrayRef<Metadata *> LoopProperties,
+ bool &HasUserTransforms) {
LLVMContext &Ctx = Header->getContext();
std::optional<bool> Enabled;
@@ -400,20 +379,17 @@ MDNode *LoopInfo::createFullUnrollMetadata(const LoopAttributes &Attrs,
}
SmallVector<Metadata *, 4> Args;
- Args.push_back(nullptr);
Args.append(LoopProperties.begin(), LoopProperties.end());
Args.push_back(MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.unroll.full")));
// No follow-up: there is no loop after full unrolling.
// TODO: Warn if there are transformations after full unrolling.
- MDNode *LoopID = MDNode::getDistinct(Ctx, Args);
- LoopID->replaceOperandWith(0, LoopID);
HasUserTransforms = true;
- return LoopID;
+ return Args;
}
-MDNode *LoopInfo::createMetadata(
+SmallVector<Metadata *, 4> LoopInfo::createMetadata(
const LoopAttributes &Attrs,
llvm::ArrayRef<llvm::Metadata *> AdditionalLoopProperties,
bool &HasUserTransforms) {
@@ -579,8 +555,8 @@ void LoopInfo::finish() {
MDNode::get(Ctx, MDString::get(Ctx, "llvm.loop.isvectorized")));
bool InnerFollowupHasTransform = false;
- MDNode *InnerFollowup = createMetadata(AfterJam, BeforeLoopProperties,
- InnerFollowupHasTransform);
+ SmallVector<Metadata *, 4> InnerFollowup = createMetadata(
+ AfterJam, BeforeLoopProperties, InnerFollowupHasTransform);
if (InnerFollowupHasTransform)
Parent->UnrollAndJamInnerFollowup = InnerFollowup;
}
@@ -589,7 +565,14 @@ void LoopInfo::finish() {
}
bool HasUserTransforms = false;
- LoopID = createMetadata(CurLoopAttr, {}, HasUserTransforms);
+ SmallVector<Metadata *, 4> Properties =
+ createMetadata(CurLoopAttr, {}, HasUserTransforms);
+ SmallVector<Metadata *, 4> Args;
+ Args.push_back(nullptr);
+ Args.append(Properties.begin(), Properties.end());
+ LoopID = MDNode::getDistinct(Ctx, Args);
+ LoopID->replaceOperandWith(0, LoopID);
+
TempLoopID->replaceAllUsesWith(LoopID);
}
diff --git a/clang/lib/CodeGen/CGLoopInfo.h b/clang/lib/CodeGen/CGLoopInfo.h
index 0fe33b2891306..3c57124f4137c 100644
--- a/clang/lib/CodeGen/CGLoopInfo.h
+++ b/clang/lib/CodeGen/CGLoopInfo.h
@@ -132,17 +132,19 @@ class LoopInfo {
/// If this loop has unroll-and-jam metadata, this can be set by the inner
/// loop's LoopInfo to set the llvm.loop.unroll_and_jam.followup_inner
/// metadata.
- llvm::MDNode *UnrollAndJamInnerFollowup = nullptr;
+ std::optional<llvm::SmallVector<llvm::Metadata *, 4>>
+ UnrollAndJamInnerFollowup;
- /// Create a LoopID without any transformations.
+ /// Create a followup MDNode that has @p LoopProperties as its attributes.
llvm::MDNode *
- createLoopPropertiesMetadata(llvm::ArrayRef<llvm::Metadata *> LoopProperties);
+ createFollowupMetadata(const char *FollowupName,
+ llvm::ArrayRef<llvm::Metadata *> LoopProperties);
- /// Create a LoopID for transformations.
+ /// Create a metadata list for transformations.
///
/// The methods call each other in case multiple transformations are applied
- /// to a loop. The transformation first to be applied will use LoopID of the
- /// next transformation in its followup attribute.
+ /// to a loop. The transformation first to be applied will use metadata list
+ /// of the next transformation in its followup attribute.
///
/// @param Attrs The loop's transformations.
/// @param LoopProperties Non-transformation properties such as debug
@@ -152,36 +154,37 @@ class LoopInfo {
/// @param HasUserTransforms [out] Set to true if the returned MDNode encodes
/// at least one transformation.
///
- /// @return A LoopID (metadata node) that can be used for the llvm.loop
- /// annotation or followup-attribute.
+ /// @return A metadata list that can be used for the llvm.loop annotation or
+ /// followup-attribute.
/// @{
- llvm::MDNode *
+ llvm::SmallVector<llvm::Metadata *, 4>
createPipeliningMetadata(const LoopAttributes &Attrs,
llvm::ArrayRef<llvm::Metadata *> LoopProperties,
bool &HasUserTransforms);
- llvm::MDNode *
+ llvm::SmallVector<llvm::Metadata *, 4>
createPartialUnrollMetadata(const LoopAttributes &Attrs,
llvm::ArrayRef<llvm::Metadata *> LoopProperties,
bool &HasUserTransforms);
- llvm::MDNode *
+ llvm::SmallVector<llvm::Metadata *, 4>
createUnrollAndJamMetadata(const LoopAttributes &Attrs,
llvm::ArrayRef<llvm::Metadata *> LoopProperties,
bool &HasUserTransforms);
- llvm::MDNode *
+ llvm::SmallVector<llvm::Metadata *, 4>
createLoopVectorizeMetadata(const LoopAttributes &Attrs,
llvm::ArrayRef<llvm::Metadata *> LoopProperties,
bool &HasUserTransforms);
- llvm::MDNode *
+ llvm::SmallVector<llvm::Metadata *, 4>
createLoopDistributeMetadata(const LoopAttributes &Attrs,
llvm::ArrayRef<llvm::Metadata *> LoopProperties,
bool &HasUserTransforms);
- llvm::MDNode *
+ llvm::SmallVector<llvm::Metadata *, 4>
createFullUnrollMetadata(const LoopAttributes &Attrs,
llvm::ArrayRef<llvm::Metadata *> LoopProperties,
bool &HasUserTransforms);
+
/// @}
- /// Create a LoopID for this loop, including transformation-unspecific
+ /// Create a metadata list for this loop, including transformation-unspecific
/// metadata such as debug location.
///
/// @param Attrs This loop's attributes and transformations.
@@ -191,11 +194,11 @@ class LoopInfo {
/// @param HasUserTransforms [out] Set to true if the returned MDNode encodes
/// at least one transformation.
///
- /// @return A LoopID (metadata node) that can be used for the llvm.loop
- /// annotation.
- llvm::MDNode *createMetadata(const LoopAttributes &Attrs,
- llvm::ArrayRef<llvm::Metadata *> LoopProperties,
- bool &HasUserTransforms);
+ /// @return A metadata list that can be used for the llvm.loop annotation.
+ llvm::SmallVector<llvm::Metadata *, 4>
+ createMetadata(const LoopAttributes &Attrs,
+ llvm::ArrayRef<llvm::Metadata *> LoopProperties,
+ bool &HasUserTransforms);
};
/// A stack of loop information corresponding to loop nesting levels.
diff --git a/clang/test/CodeGenCXX/pragma-followup_inner.cpp b/clang/test/CodeGenCXX/pragma-followup_inner.cpp
index bdcf8a9cbcc07..04f5656dcd638 100644
--- a/clang/test/CodeGenCXX/pragma-followup_inner.cpp
+++ b/clang/test/CodeGenCXX/pragma-followup_inner.cpp
@@ -23,20 +23,17 @@ extern "C" void followup_inner(int n, int *x) {
// CHECK-DAG: ![[INNERLOOP_3]] = distinct !{![[INNERLOOP_3]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[DISTRIBUTE_5:[0-9]+]], ![[DISTRIBUTE_FOLLOWUP_6:[0-9]+]]}
// CHECK-DAG: ![[PARALLEL_ACCESSES_4]] = !{!"llvm.loop.parallel_accesses", ![[ACCESSGROUP_2]]}
// CHECK-DAG: ![[DISTRIBUTE_5]] = !{!"llvm.loop.distribute.enable", i1 true}
-// CHECK-DAG: ![[DISTRIBUTE_FOLLOWUP_6]] = !{!"llvm.loop.distribute.followup_all", ![[LOOP_7:[0-9]+]]}
-// CHECK-DAG: ![[LOOP_7]] = distinct !{![[LOOP_7]], ![[PARALLEL_ACCESSES_4]], ![[VECTORIZE_8:[0-9]+]]}
+// CHECK-DAG: ![[DISTRIBUTE_FOLLOWUP_6]] = !{!"llvm.loop.distribute.followup_all", ![[PARALLEL_ACCESSES_4]], ![[VECTORIZE_8:[0-9]+]]}
// CHECK-DAG: ![[VECTORIZE_8]] = !{!"llvm.loop.vectorize.enable", i1 true}
// CHECK-DAG: ![[OUTERLOOP_9]] = distinct !{![[OUTERLOOP_9]], [[MP:![0-9]+]], ![[UNROLLANDJAM_COUNT_10:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUPINNER_11:[0-9]+]]}
// CHECK-DAG: ![[UNROLLANDJAM_COUNT_10]] = !{!"llvm.loop.unroll_and_jam.count", i32 4}
-// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUPINNER_11]] = !{!"llvm.loop.unroll_and_jam.followup_inner", ![[LOOP_12:[0-9]+]]}
-// CHECK-DAG: ![[LOOP_12]] = distinct !{![[LOOP_12:[0-9]+]], ![[PARALLEL_ACCESSES_4]], ![[ISVECTORIZED_13:[0-9]+]], ![[UNROLL_COUNT_13:[0-9]+]], ![[UNROLL_FOLLOWUP_14:[0-9]+]]}
+// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUPINNER_11]] = !{!"llvm.loop.unroll_and_jam.followup_inner", ![[PARALLEL_ACCESSES_4]], ![[ISVECTORIZED_13:[0-9]+]], ![[UNROLL_COUNT_13:[0-9]+]], ![[UNROLL_FOLLOWUP_14:[0-9]+]]}
// CHECK-DAG: ![[ISVECTORIZED_13]] = !{!"llvm.loop.isvectorized"}
// CHECK-DAG: ![[UNROLL_COUNT_13]] = !{!"llvm.loop.unroll.count", i32 4}
-// CHECK-DAG: ![[UNROLL_FOLLOWUP_14]] = !{!"llvm.loop.unroll.followup_all", ![[LOOP_15:[0-9]+]]}
-// CHECK-DAG: ![[LOOP_15]] = distinct !{![[LOOP_15]], ![[PARALLEL_ACCESSES_4]], ![[ISVECTORIZED_13]], ![[UNROLL_DISABLE_16:[0-9]+]], ![[PIPELINE_17:[0-9]+]]}
+// CHECK-DAG: ![[UNROLL_FOLLOWUP_14]] = !{!"llvm.loop.unroll.followup_all", ![[PARALLEL_ACCESSES_4]], ![[ISVECTORIZED_13]], ![[UNROLL_DISABLE_16:[0-9]+]], ![[PIPELINE_17:[0-9]+]]}
// CHECK-DAG: ![[UNROLL_DISABLE_16]] = !{!"llvm.loop.unroll.disable"}
// CHECK-DAG: ![[PIPELINE_17]] = !{!"llvm.loop.pipeline.initiationinterval", i32 10}
diff --git a/clang/test/CodeGenCXX/pragma-followup_outer.cpp b/clang/test/CodeGenCXX/pragma-followup_outer.cpp
index c3ca2a7b1c288..fdd5a07d00857 100644
--- a/clang/test/CodeGenCXX/pragma-followup_outer.cpp
+++ b/clang/test/CodeGenCXX/pragma-followup_outer.cpp
@@ -20,22 +20,18 @@ extern "C" void followup_outer(int n, int *x) {
// CHECK-DAG: ![[LOOP_3:[0-9]+]] = distinct !{![[LOOP_3:[0-9]+]], [[MP:![0-9]+]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[DISTRIBUTE_5:[0-9]+]], ![[DISTRIBUTE_FOLLOWUP_6:[0-9]+]]}
// CHECK-DAG: ![[PARALLEL_ACCESSES_4:[0-9]+]] = !{!"llvm.loop.parallel_accesses", ![[ACCESSGROUP_2]]}
// CHECK-DAG: ![[DISTRIBUTE_5:[0-9]+]] = !{!"llvm.loop.distribute.enable", i1 true}
-// CHECK-DAG: ![[DISTRIBUTE_FOLLOWUP_6:[0-9]+]] = !{!"llvm.loop.distribute.followup_all", ![[LOOP_7:[0-9]+]]}
-// CHECK-DAG: ![[LOOP_7:[0-9]+]] = distinct !{![[LOOP_7:[0-9]+]], [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[VECTORIZE_8:[0-9]+]], ![[VECTORIZE_FOLLOWUP_9:[0-9]+]]}
+// CHECK-DAG: ![[DISTRIBUTE_FOLLOWUP_6:[0-9]+]] = !{!"llvm.loop.distribute.followup_all", [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[VECTORIZE_8:[0-9]+]], ![[VECTORIZE_FOLLOWUP_9:[0-9]+]]}
// CHECK-DAG: ![[VECTORIZE_8:[0-9]+]] = !{!"llvm.loop.vectorize.enable", i1 true}
-// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_9:[0-9]+]] = !{!"llvm.loop.vectorize.followup_all", ![[LOOP_10:[0-9]+]]}
-// CHECK-DAG: ![[LOOP_10:[0-9]+]] = distinct !{![[LOOP_10:[0-9]+]], [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[ISVECTORIZED_11:[0-9]+]], ![[UNROLLANDJAM_12:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUPOUTER_13:[0-9]+]]}
+// CHECK-DAG: ![[VECTORIZE_FOLLOWUP_9:[0-9]+]] = !{!"llvm.loop.vectorize.followup_all", [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[ISVECTORIZED_11:[0-9]+]], ![[UNROLLANDJAM_12:[0-9]+]], ![[UNROLLANDJAM_FOLLOWUPOUTER_13:[0-9]+]]}
// CHECK-DAG: ![[ISVECTORIZED_11:[0-9]+]] = !{!"llvm.loop.isvectorized"}
// CHECK-DAG: ![[UNROLLANDJAM_12:[0-9]+]] = !{!"llvm.loop.unroll_and_jam.enable"}
-// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUPOUTER_13:[0-9]+]] = !{!"llvm.loop.unroll_and_jam.followup_outer", ![[LOOP_14:[0-9]+]]}
-// CHECK-DAG: ![[LOOP_14:[0-9]+]] = distinct !{![[LOOP_14:[0-9]+]], [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[ISVECTORIZED_11:[0-9]+]], ![[UNROLLANDJAM_DISABLE_15:[0-9]+]], ![[UNROLL_COUNT_16:[0-9]+]], ![[UNROLL_FOLLOWUP_17:[0-9]+]]}
+// CHECK-DAG: ![[UNROLLANDJAM_FOLLOWUPOUTER_13:[0-9]+]] = !{!"llvm.loop.unroll_and_jam.followup_outer", [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[ISVECTORIZED_11:[0-9]+]], ![[UNROLLANDJAM_DISABLE_15:[0-9]+]], ![[UNROLL_COUNT_16:[0-9]+]], ![[UNROLL_FOLLOWUP_17:[0-9]+]]}
// CHECK-DAG: ![[UNROLLANDJAM_DISABLE_15:[0-9]+]] = !{!"llvm.loop.unroll_and_jam.disable"}
// CHECK-DAG: ![[UNROLL_COUNT_16:[0-9]+]] = !{!"llvm.loop.unroll.count", i32 4}
-// CHECK-DAG: ![[UNROLL_FOLLOWUP_17:[0-9]+]] = !{!"llvm.loop.unroll.followup_all", ![[LOOP_18:[0-9]+]]}
-// CHECK-DAG: ![[LOOP_18:[0-9]+]] = distinct !{![[LOOP_18:[0-9]+]], [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[ISVECTORIZED_11:[0-9]+]], ![[UNROLLANDJAM_DISABLE_15:[0-9]+]], ![[UNROLL_DISABLE_19:[0-9]+]], ![[INITIATIONINTERVAL_20:[0-9]+]]}
+// CHECK-DAG: ![[UNROLL_FOLLOWUP_17:[0-9]+]] = !{!"llvm.loop.unroll.followup_all", [[MP]], ![[PARALLEL_ACCESSES_4:[0-9]+]], ![[ISVECTORIZED_11:[0-9]+]], ![[UNROLLANDJAM_DISABLE_15:[0-9]+]], ![[UNROLL_DISABLE_19:[0-9]+]], ![[INITIATIONINTERVAL_20:[0-9]+]]}
// CHECK-DAG: ![[UNROLL_DISABLE_19:[0-9]+]] = !{!"llvm.loop.unroll.disable"}
// CHECK-DAG: ![[INITIATIONINTERVAL_20:[0-9]+]] = !{!"llvm.loop.pipeline.initiationinterval", i32 10}
diff --git a/clang/test/CodeGenCXX/pragma-loop.cpp b/clang/test/CodeGenCXX/pragma-loop.cpp
index 127df41522a57..76bdcc4a5a9c9 100644
--- a/clang/test/CodeGenCXX/pragma-loop.cpp
+++ b/clang/test/CodeGenCXX/pragma-loop.cpp
@@ -215,8 +215,7 @@ void for_test_scalable_1(int *List, int Length) {
// CHECK: ![[VECTORIZE_ENABLE]] = !{!"llvm.loop.vectorize.enable", i1 true}
// CHECK: ![[LOOP_3]] = distinct !{![[LOOP_3]], [[MP]], ![[INTERLEAVE_4:.*]], ![[VECTORIZE_ENABLE]], ![[FOLLOWUP_VECTOR_3:.*]]}
-// CHECK: ![[FOLLOWUP_VECTOR_3]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_3:.*]]}
-// CHECK: ![[AFTER_VECTOR_3]] = distinct !{![[AFTER_VECTOR_3]], [[MP]], ![[ISVECTORIZED:.*]], ![[UNROLL_8:.*]]}
+// CHECK: ![[FOLLOWUP_VECTOR_3]] = !{!"llvm.loop.vectorize.followup_all", [[MP]], ![[ISVECTORIZED:.*]], ![[UNROLL_8:.*]]}
// CHECK: ![[ISVECTORIZED]] = !{!"llvm.loop.isvectorized"}
// CHECK: ![[UNROLL_8]] = !{!"llvm.loop.unroll.count", i32 8}
@@ -227,36 +226,26 @@ void for_test_scalable_1(int *List, int Length) {
// CHECK: ![[LOOP_5]] = distinct !{![[LOOP_5]], ![[UNROLL_DISABLE:.*]], ![[DISTRIBUTE_DISABLE:.*]], ![[WIDTH_1:.*]]}
// CHECK: ![[WIDTH_1]] = !{!"llvm.loop.vectorize.width", i32 1}
-// CHECK: ![[LOOP_6]] = distinct !{![[LOOP_6]], [[MP]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_2:.*]], ![[FOLLOWUP_VECTOR_6:.*]]}
-// CHECK: ![[FOLLOWUP_VECTOR_6]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_6:.*]]}
-// CHECK: ![[AFTER_VECTOR_6]] = distinct !{![[AFTER_VECTOR_6]], ![[ISVECTORIZED:.*]], ![[UNROLL_8:.*]]}
+// CHECK: ![[LOOP_6]] = distinct !{![[LOOP_6]], [[MP]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_2:.*]], ![[FOLLOWUP_VECTOR_3]]}
// CHECK: ![[LOOP_7]] = distinct !{![[LOOP_7]], [[MP]], ![[WIDTH_5:.*]], ![[FIXED_VEC]], ![[VECTORIZE_ENABLE]]}
// CHECK: ![[WIDTH_5]] = !{!"llvm.loop.vectorize.width", i32 5}
// CHECK: ![[LOOP_8]] = distinct !{![[LOOP_8]], [[MP]], ![[WIDTH_5:.*]], ![[FIXED_VEC]], ![[VECTORIZE_ENABLE]]}
-// CHECK: ![[LOOP_9]] = distinct !{![[LOOP_9]], ![[WIDTH_8:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_8:.*]], ![[FOLLOWUP_VECTOR_9:.*]]}
-// CHECK: ![[FOLLOWUP_VECTOR_9]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_9:.*]]}
-// CHECK: ![[AFTER_VECTOR_9]] = distinct !{![[AFTER_VECTOR_9]], ![[ISVECTORIZED:.*]], ![[UNROLL_8:.*]]}
+// CHECK: ![[LOOP_9]] = distinct !{![[LOOP_9]], ![[WIDTH_8:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_8:.*]], ![[FOLLOWUP_VECTOR_3]]}
-// CHECK: ![[LOOP_10]] = distinct !{![[LOOP_10]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_2:.*]], ![[FOLLOWUP_VECTOR_10:.*]]}
-// CHECK: ![[FOLLOWUP_VECTOR_10]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_10:.*]]}
-// CHECK: ![[AFTER_VECTOR_10]] = distinct !{![[AFTER_VECTOR_10]], ![[ISVECTORIZED:.*]], ![[UNROLL_8:.*]]}
+// CHECK: ![[LOOP_10]] = distinct !{![[LOOP_10]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_2:.*]], ![[FOLLOWUP_VECTOR_3]]}
-// CHECK: ![[LOOP_11]] = distinct !{![[LOOP_11]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_4:.*]], ![[FOLLOWUP_VECTOR_11:.*]]}
-// CHECK: ![[FOLLOWUP_VECTOR_11]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_11:.*]]}
-// CHECK: ![[AFTER_VECTOR_11]] = distinct !{![[AFTER_VECTOR_11]], ![[ISVECTORIZED:.*]], ![[UNROLL_8:.*]]}
+// CHECK: ![[LOOP_11]] = distinct !{![[LOOP_11]], ![[WIDTH_2:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_4:.*]], ![[FOLLOWUP_VECTOR_3]]}
// CHECK: ![[LOOP_12]] = distinct !{![[LOOP_12]], ![[WIDTH_6:.*]], ![[FIXED_VEC]], ![[INTERLEAVE_10:.*]], ![[FOLLOWUP_VECTOR_12:.*]]}
-// CHECK: ![[FOLLOWUP_VECTOR_12]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_12:.*]]}
-// CHECK: ![[AFTER_VECTOR_12]] = distinct !{![[AFTER_VECTOR_12]], ![[ISVECTORIZED:.*]], ![[UNROLL_24:.*]]}
+// CHECK: ![[FOLLOWUP_VECTOR_12]] = !{!"llvm.loop.vectorize.followup_all", ![[ISVECTORIZED:.*]], ![[UNROLL_24:.*]]}
// CHECK: ![[UNROLL_24]] = !{!"llvm.loop.unroll.count", i32 24}
// CHECK: ![[LOOP_13]] = distinct !{![[LOOP_13]], ![[WIDTH_8:.*]], ![[INTERLEAVE_16:.*]], ![[VECTORIZE_ENABLE]], ![[FOLLOWUP_VECTOR_13:.*]]}
// CHECK: ![[INTERLEAVE_16]] = !{!"llvm.loop.interleave.count", i32 16}
-// CHECK: ![[FOLLOWUP_VECTOR_13]] = !{!"llvm.loop.vectorize.followup_all", ![[AFTER_VECTOR_13:.*]]}
-// CHECK: ![[AFTER_VECTOR_13]] = distinct !{![[AFTER_VECTOR_13]], ![[ISVECTORIZED:.*]], ![[UNROLL_32:.*]]}
+// CHECK: ![[FOLLOWUP_VECTOR_13]] = !{!"llvm.loop.vectorize.followup_all", ![[ISVECTORIZED:.*]], ![[UNROLL_32:.*]]}
// CHECK: ![[UNROLL_32]] = !{!"llvm.loop.unroll.count", i32 32}
// CHECK: ![[LOOP_14]] = distinct !{![[LOOP_14]], [[MP]], ![[WIDTH_10:.*]], ![[FIXED_VEC]], ![[VECTORIZE_ENABLE]]}
diff --git a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll
index fa5c206547a07..1d633dd6a4e04 100644
--- a/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll
+++ b/llvm/test/Transforms/LoopVectorize/make-followup-loop-id.ll
@@ -12,9 +12,6 @@
; }
; }
;
-; FIXME: Currently unrolling is not applied. This is because the new Loop ID
-; created after vectorization does not directly contain unroll metadata.
-; Unexpected nests have been created.
define void @f(ptr noundef captures(none) %a, float noundef %x) {
; CHECK-LABEL: define void @f(
; CHECK-SAME: ptr noundef captures(none) [[A:%.*]], float noundef [[X:%.*]]) {
@@ -25,14 +22,47 @@ define void @f(ptr noundef captures(none) %a, float noundef %x) {
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
-; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[INDEX_NEXT_6:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[INDEX_NEXT_6:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_6]]
-; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[TMP14]], i32 0
-; CHECK-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[WIDE_LOAD_7:%.*]] = load <4 x float>, ptr [[TMP14]], align 4
; CHECK-NEXT: [[TMP15:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_7]]
-; CHECK-NEXT: store <4 x float> [[TMP15]], ptr [[TMP2]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: store <4 x float> [[TMP15]], ptr [[TMP14]], align 4
+; CHECK-NEXT: [[INDEX_NEXT1:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 4
+; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT1]]
+; CHECK-NEXT: [[WIDE_LOAD_1:%.*]] = load <4 x float>, ptr [[TMP2]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_1]]
+; CHECK-NEXT: store <4 x float> [[TMP3]], ptr [[TMP2]], align 4
+; CHECK-NEXT: [[INDEX_NEXT_1:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 8
+; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_1]]
+; CHECK-NEXT: [[WIDE_LOAD_2:%.*]] = load <4 x float>, ptr [[TMP16]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_2]]
+; CHECK-NEXT: store <4 x float> [[TMP5]], ptr [[TMP16]], align 4
+; CHECK-NEXT: [[INDEX_NEXT_2:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 12
+; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_2]]
+; CHECK-NEXT: [[WIDE_LOAD_3:%.*]] = load <4 x float>, ptr [[TMP6]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_3]]
+; CHECK-NEXT: store <4 x float> [[TMP7]], ptr [[TMP6]], align 4
+; CHECK-NEXT: [[INDEX_NEXT_3:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 16
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_3]]
+; CHECK-NEXT: [[WIDE_LOAD_4:%.*]] = load <4 x float>, ptr [[TMP8]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_4]]
+; CHECK-NEXT: store <4 x float> [[TMP9]], ptr [[TMP8]], align 4
+; CHECK-NEXT: [[INDEX_NEXT_4:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 20
+; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_4]]
+; CHECK-NEXT: [[WIDE_LOAD_5:%.*]] = load <4 x float>, ptr [[TMP10]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_5]]
+; CHECK-NEXT: store <4 x float> [[TMP11]], ptr [[TMP10]], align 4
+; CHECK-NEXT: [[INDEX_NEXT_5:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 24
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_5]]
+; CHECK-NEXT: [[WIDE_LOAD_6:%.*]] = load <4 x float>, ptr [[TMP12]], align 4
+; CHECK-NEXT: [[TMP13:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_6]]
+; CHECK-NEXT: store <4 x float> [[TMP13]], ptr [[TMP12]], align 4
+; CHECK-NEXT: [[INDEX_NEXT_7:%.*]] = add nuw nsw i64 [[INDEX_NEXT_6]], 28
+; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[INDEX_NEXT_7]]
+; CHECK-NEXT: [[WIDE_LOAD_8:%.*]] = load <4 x float>, ptr [[TMP17]], align 4
+; CHECK-NEXT: [[TMP18:%.*]] = fmul <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD_8]]
+; CHECK-NEXT: store <4 x float> [[TMP18]], ptr [[TMP17]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX_NEXT_6]], 32
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
; CHECK-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
@@ -41,14 +71,49 @@ define void @f(ptr noundef captures(none) %a, float noundef %x) {
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT_7:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV]]
; CHECK-NEXT: [[LOAD:%.*]] = load float, ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[MUL:%.*]] = fmul float [[X]], [[LOAD]]
; CHECK-NEXT: store float [[MUL]], ptr [[ARRAYIDX]], align 4
-; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
-; CHECK-NEXT: [[COMP:%.*]] = icmp eq i64 [[IV_NEXT]], 1024
-; CHECK-NEXT: br i1 [[COMP]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK-NEXT: [[IV_NEXT:%.*]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[ARRAYIDX_1:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT]]
+; CHECK-NEXT: [[LOAD_1:%.*]] = load float, ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[MUL_1:%.*]] = fmul float [[X]], [[LOAD_1]]
+; CHECK-NEXT: store float [[MUL_1]], ptr [[ARRAYIDX_1]], align 4
+; CHECK-NEXT: [[IV_NEXT_1:%.*]] = add nuw nsw i64 [[IV]], 2
+; CHECK-NEXT: [[ARRAYIDX_2:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_1]]
+; CHECK-NEXT: [[LOAD_2:%.*]] = load float, ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[MUL_2:%.*]] = fmul float [[X]], [[LOAD_2]]
+; CHECK-NEXT: store float [[MUL_2]], ptr [[ARRAYIDX_2]], align 4
+; CHECK-NEXT: [[IV_NEXT_2:%.*]] = add nuw nsw i64 [[IV]], 3
+; CHECK-NEXT: [[ARRAYIDX_3:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_2]]
+; CHECK-NEXT: [[LOAD_3:%.*]] = load float, ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[MUL_3:%.*]] = fmul float [[X]], [[LOAD_3]]
+; CHECK-NEXT: store float [[MUL_3]], ptr [[ARRAYIDX_3]], align 4
+; CHECK-NEXT: [[IV_NEXT_3:%.*]] = add nuw nsw i64 [[IV]], 4
+; CHECK-NEXT: [[ARRAYIDX_4:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_3]]
+; CHECK-NEXT: [[LOAD_4:%.*]] = load float, ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT: [[MUL_4:%.*]] = fmul float [[X]], [[LOAD_4]]
+; CHECK-NEXT: store float [[MUL_4]], ptr [[ARRAYIDX_4]], align 4
+; CHECK-NEXT: [[IV_NEXT_4:%.*]] = add nuw nsw i64 [[IV]], 5
+; CHECK-NEXT: [[ARRAYIDX_5:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_4]]
+; CHECK-NEXT: [[LOAD_5:%.*]] = load float, ptr [[ARRAYIDX_5]], align 4
+; CHECK-NEXT: [[MUL_5:%.*]] = fmul float [[X]], [[LOAD_5]]
+; CHECK-NEXT: store float [[MUL_5]], ptr [[ARRAYIDX_5]], align 4
+; CHECK-NEXT: [[IV_NEXT_5:%.*]] = add nuw nsw i64 [[IV]], 6
+; CHECK-NEXT: [[ARRAYIDX_6:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_5]]
+; CHECK-NEXT: [[LOAD_6:%.*]] = load float, ptr [[ARRAYIDX_6]], align 4
+; CHECK-NEXT: [[MUL_6:%.*]] = fmul float [[X]], [[LOAD_6]]
+; CHECK-NEXT: store float [[MUL_6]], ptr [[ARRAYIDX_6]], align 4
+; CHECK-NEXT: [[IV_NEXT_6:%.*]] = add nuw nsw i64 [[IV]], 7
+; CHECK-NEXT: [[ARRAYIDX_7:%.*]] = getelementptr inbounds nuw float, ptr [[A]], i64 [[IV_NEXT_6]]
+; CHECK-NEXT: [[LOAD_7:%.*]] = load float, ptr [[ARRAYIDX_7]], align 4
+; CHECK-NEXT: [[MUL_7:%.*]] = fmul float [[X]], [[LOAD_7]]
+; CHECK-NEXT: store float [[MUL_7]], ptr [[ARRAYIDX_7]], align 4
+; CHECK-NEXT: [[IV_NEXT_7]] = add nuw nsw i64 [[IV]], 8
+; CHECK-NEXT: [[COMP_7:%.*]] = icmp eq i64 [[IV_NEXT_7]], 1024
+; CHECK-NEXT: br i1 [[COMP_7]], label %[[EXIT_LOOPEXIT:.*]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
; CHECK: [[EXIT_LOOPEXIT]]:
; CHECK-NEXT: br label %[[EXIT]]
; CHECK: [[EXIT]]:
@@ -73,15 +138,12 @@ exit:
!0 = distinct !{!0, !1, !2}
!1 = !{!"llvm.loop.vectorize.enable", i1 true}
-!2 = !{!"llvm.loop.vectorize.followup_all", !3}
-!3 = distinct !{!3, !4, !5}
-!4 = !{!"llvm.loop.isvectorized"}
-!5 = !{!"llvm.loop.unroll.count", i32 8}
+!2 = !{!"llvm.loop.vectorize.followup_all", !3, !4}
+!3 = !{!"llvm.loop.isvectorized"}
+!4 = !{!"llvm.loop.unroll.count", i32 8}
;.
-; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META4:![0-9]+]]}
-; CHECK: [[META1]] = distinct !{[[META1]], [[META2:![0-9]+]], [[META3:![0-9]+]]}
-; CHECK: [[META2]] = !{!"llvm.loop.isvectorized"}
-; CHECK: [[META3]] = !{!"llvm.loop.unroll.count", i32 8}
-; CHECK: [[META4]] = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META1]]}
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized"}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META1]], [[META2]]}
;.
More information about the llvm-commits
mailing list