[llvm] [LoopUnswitch] Allow i1 truncs in loop unswitch (PR #89738)

Matthew Devereau via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 23 06:26:17 PDT 2024


https://github.com/MDevereau updated https://github.com/llvm/llvm-project/pull/89738

>From 0a751530a2546d51125b841e1ca3e2131b168ef1 Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Mon, 22 Apr 2024 13:28:44 +0000
Subject: [PATCH 1/2] [LoopUnswitch] Allow i1 truncs in loop unswitch

With the addition of #84628, truncs to i1 are being
emitted as conditions to branch instructions. This caused
significant regressions in cases which were previously improved by
loop unswitch. Adding truncs to i1 restore the previous performance
seen.
---
 llvm/lib/Transforms/Utils/LoopUtils.cpp       |  10 +-
 .../SimpleLoopUnswitch/endless-unswitch.ll    |  93 +++++++++++++
 .../SimpleLoopUnswitch/partial-unswitch.ll    | 130 ++++++++++++++++++
 3 files changed, 232 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 73c5d636782294..e10c5dcbd218aa 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1930,7 +1930,15 @@ llvm::hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold,
   if (!TI || !TI->isConditional())
     return {};
 
-  auto *CondI = dyn_cast<CmpInst>(TI->getCondition());
+  Instruction *CondI = nullptr;
+  CondI = dyn_cast<CmpInst>(TI->getCondition());
+
+  if (!CondI) {
+    CondI = dyn_cast<TruncInst>(TI->getCondition());
+    if (CondI && CondI->getType() != Type::getInt1Ty(TI->getContext())) {
+      return {};
+    }
+  }
   // The case with the condition outside the loop should already be handled
   // earlier.
   if (!CondI || !L.contains(CondI))
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/endless-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/endless-unswitch.ll
index 0d3aa8b243109e..a5ad182ad0b3e0 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/endless-unswitch.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/endless-unswitch.ll
@@ -106,3 +106,96 @@ for.inc:                                          ; preds = %for.cond5
   store i8 0, ptr @b, align 1
   br label %for.cond5
 }
+
+define void @e() {
+; CHECK-LABEL: @e(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    br i1 false, label [[FOR_END:%.*]], label [[FOR_COND]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr null, align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i16 [[TMP0]] to i1
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_END_SPLIT:%.*]], label [[FOR_END_SPLIT_US:%.*]]
+; CHECK:       for.end.split.us:
+; CHECK-NEXT:    br label [[G_US:%.*]]
+; CHECK:       g.us:
+; CHECK-NEXT:    br label [[G_SPLIT_US6:%.*]]
+; CHECK:       for.cond1.us1:
+; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr null, align 2
+; CHECK-NEXT:    [[TOBOOL4_NOT_US:%.*]] = trunc i16 [[TMP2]] to i1
+; CHECK-NEXT:    br i1 [[TOBOOL4_NOT_US]], label [[FOR_COND5_PREHEADER_US4:%.*]], label [[G_LOOPEXIT_US:%.*]]
+; CHECK:       for.cond5.us2:
+; CHECK-NEXT:    br i1 false, label [[FOR_COND1_LOOPEXIT_US5:%.*]], label [[FOR_INC_US3:%.*]]
+; CHECK:       for.inc.us3:
+; CHECK-NEXT:    store i8 0, ptr @b, align 1
+; CHECK-NEXT:    br label [[FOR_COND5_US2:%.*]]
+; CHECK:       for.cond5.preheader.us4:
+; CHECK-NEXT:    br label [[FOR_COND5_US2]]
+; CHECK:       for.cond1.loopexit.us5:
+; CHECK-NEXT:    br label [[FOR_COND1_US1:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       g.loopexit.us:
+; CHECK-NEXT:    br label [[G_US]]
+; CHECK:       g.split.us6:
+; CHECK-NEXT:    br label [[FOR_COND1_US1]]
+; CHECK:       for.end.split:
+; CHECK-NEXT:    br label [[G:%.*]]
+; CHECK:       g.loopexit:
+; CHECK-NEXT:    br label [[G]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       g:
+; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr null, align 2
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i16 [[TMP3]] to i1
+; CHECK-NEXT:    br i1 [[TMP4]], label [[G_SPLIT_US:%.*]], label [[G_SPLIT:%.*]]
+; CHECK:       g.split.us:
+; CHECK-NEXT:    br label [[FOR_COND1_US:%.*]]
+; CHECK:       for.cond1.us:
+; CHECK-NEXT:    br label [[FOR_COND5_PREHEADER_US:%.*]]
+; CHECK:       for.cond5.us:
+; CHECK-NEXT:    br i1 false, label [[FOR_COND1_LOOPEXIT_US:%.*]], label [[FOR_INC_US:%.*]]
+; CHECK:       for.inc.us:
+; CHECK-NEXT:    store i8 0, ptr @b, align 1
+; CHECK-NEXT:    br label [[FOR_COND5_US:%.*]]
+; CHECK:       for.cond5.preheader.us:
+; CHECK-NEXT:    br label [[FOR_COND5_US]]
+; CHECK:       for.cond1.loopexit.us:
+; CHECK-NEXT:    br label [[FOR_COND1_US]]
+; CHECK:       g.split:
+; CHECK-NEXT:    br label [[FOR_COND1:%.*]]
+; CHECK:       for.cond1.loopexit:
+; CHECK-NEXT:    br label [[FOR_COND1]], !llvm.loop [[LOOP3]]
+; CHECK:       for.cond1:
+; CHECK-NEXT:    [[TMP5:%.*]] = load i16, ptr null, align 2
+; CHECK-NEXT:    [[TOBOOL4_NOT:%.*]] = trunc i16 [[TMP5]] to i1
+; CHECK-NEXT:    br i1 [[TOBOOL4_NOT]], label [[FOR_COND5_PREHEADER:%.*]], label [[G_LOOPEXIT:%.*]]
+; CHECK:       for.cond5.preheader:
+; CHECK-NEXT:    br label [[FOR_COND5:%.*]]
+; CHECK:       for.cond5:
+; CHECK-NEXT:    br i1 false, label [[FOR_COND1_LOOPEXIT:%.*]], label [[FOR_INC:%.*]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    store i8 0, ptr @b, align 1
+; CHECK-NEXT:    br label [[FOR_COND5]]
+;
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %entry
+  br i1 false, label %for.end, label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  br label %g
+
+g:                                                ; preds = %for.cond1, %for.end
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.cond5, %g
+  %0 = load i16, ptr null, align 2
+  %tobool4.not = trunc i16 %0 to i1
+  br i1 %tobool4.not, label %for.cond5, label %g
+
+for.cond5:                                        ; preds = %for.inc, %for.cond1
+  br i1 false, label %for.cond1, label %for.inc
+
+for.inc:                                          ; preds = %for.cond5
+  store i8 0, ptr @b, align 1
+  br label %for.cond5
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
index f97e5c3eec9d46..1d8942079ffd81 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
@@ -1326,6 +1326,136 @@ exit:
   ret i32 10
 }
 
+define i32 @partial_unswitch_true_successor_trunc(ptr %ptr, i32 %N) {
+; CHECK-LABEL: @partial_unswitch_true_successor_trunc(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i1
+; CHECK-NEXT:    br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label [[LOOP_HEADER_US:%.*]]
+; CHECK:       loop.header.us:
+; CHECK-NEXT:    [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ]
+; CHECK-NEXT:    br label [[NOCLOBBER_US:%.*]]
+; CHECK:       noclobber.us:
+; CHECK-NEXT:    br label [[LOOP_LATCH_US]]
+; CHECK:       loop.latch.us:
+; CHECK-NEXT:    [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]]
+; CHECK-NEXT:    [[IV_NEXT_US]] = add i32 [[IV_US]], 1
+; CHECK-NEXT:    br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]]
+; CHECK:       exit.split.us:
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK:       loop.header:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    [[LV:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    [[SC:%.*]] = trunc i32 [[LV]] to i1
+; CHECK-NEXT:    br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]]
+; CHECK:       noclobber:
+; CHECK-NEXT:    br label [[LOOP_LATCH]]
+; CHECK:       clobber:
+; CHECK-NEXT:    call void @clobber()
+; CHECK-NEXT:    br label [[LOOP_LATCH]]
+; CHECK:       loop.latch:
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV]], [[N]]
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK:       exit.split:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 10
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  %lv = load i32, ptr %ptr
+  %sc = trunc i32 %lv to i1
+  br i1 %sc, label %noclobber, label %clobber
+
+noclobber:
+  br label %loop.latch
+
+clobber:
+  call void @clobber()
+  br label %loop.latch
+
+loop.latch:
+  %c = icmp ult i32 %iv, %N
+  %iv.next = add i32 %iv, 1
+  br i1 %c, label %loop.header, label %exit
+
+exit:
+  ret i32 10
+}
+
+define i32 @partial_unswitch_false_successor_trunc(ptr %ptr, i32 %N) {
+; CHECK-LABEL: @partial_unswitch_false_successor_trunc(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i1
+; CHECK-NEXT:    br i1 [[TMP1]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_US:%.*]]
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label [[LOOP_HEADER_US:%.*]]
+; CHECK:       loop.header.us:
+; CHECK-NEXT:    [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ]
+; CHECK-NEXT:    br label [[NOCLOBBER_US:%.*]]
+; CHECK:       noclobber.us:
+; CHECK-NEXT:    br label [[LOOP_LATCH_US]]
+; CHECK:       loop.latch.us:
+; CHECK-NEXT:    [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]]
+; CHECK-NEXT:    [[IV_NEXT_US]] = add i32 [[IV_US]], 1
+; CHECK-NEXT:    br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]]
+; CHECK:       exit.split.us:
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK:       loop.header:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    [[LV:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    [[SC:%.*]] = trunc i32 [[LV]] to i1
+; CHECK-NEXT:    br i1 [[SC]], label [[CLOBBER:%.*]], label [[NOCLOBBER:%.*]]
+; CHECK:       clobber:
+; CHECK-NEXT:    call void @clobber()
+; CHECK-NEXT:    br label [[LOOP_LATCH]]
+; CHECK:       noclobber:
+; CHECK-NEXT:    br label [[LOOP_LATCH]]
+; CHECK:       loop.latch:
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV]], [[N]]
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK:       exit.split:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 10
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  %lv = load i32, ptr %ptr
+  %sc = trunc i32 %lv to i1
+  br i1 %sc, label %clobber, label %noclobber
+
+clobber:
+  call void @clobber()
+  br label %loop.latch
+
+noclobber:
+  br label %loop.latch
+
+loop.latch:
+  %c = icmp ult i32 %iv, %N
+  %iv.next = add i32 %iv, 1
+  br i1 %c, label %loop.header, label %exit
+
+exit:
+  ret i32 10
+}
+
 ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[UNSWITCH_PARTIAL_DISABLE:![0-9]+]]}
 ; CHECK: [[UNSWITCH_PARTIAL_DISABLE]] = !{!"llvm.loop.unswitch.partial.disable"}
 ; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[UNSWITCH_PARTIAL_DISABLE]]}

>From 3763f4f2af1072bab25ff1d9c906e28271b3c8b1 Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Tue, 23 Apr 2024 12:58:52 +0000
Subject: [PATCH 2/2] Format assignment and if statement

---
 llvm/lib/Transforms/Utils/LoopUtils.cpp | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index e10c5dcbd218aa..6799e3973fbf00 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1930,15 +1930,13 @@ llvm::hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold,
   if (!TI || !TI->isConditional())
     return {};
 
-  Instruction *CondI = nullptr;
-  CondI = dyn_cast<CmpInst>(TI->getCondition());
-
-  if (!CondI) {
+  Instruction *CondI = dyn_cast<CmpInst>(TI->getCondition());
+  if (!CondI){
     CondI = dyn_cast<TruncInst>(TI->getCondition());
-    if (CondI && CondI->getType() != Type::getInt1Ty(TI->getContext())) {
+    if (CondI && CondI->getType() != Type::getInt1Ty(TI->getContext()))
       return {};
-    }
   }
+
   // The case with the condition outside the loop should already be handled
   // earlier.
   if (!CondI || !L.contains(CondI))



More information about the llvm-commits mailing list