[llvm] [LoopUnswitch] Allow i1 truncs in loop unswitch (PR #89738)

Matthew Devereau via llvm-commits llvm-commits at lists.llvm.org
Tue Apr 23 04:09:49 PDT 2024


https://github.com/MDevereau created https://github.com/llvm/llvm-project/pull/89738

With the addition of #84628, truncs to i1 are being emitted as conditions to branch instructions. This caused significant regressions in cases which were previously improved by loop unswitch. Adding truncs to i1 restore the previous performance seen.

>From 0a751530a2546d51125b841e1ca3e2131b168ef1 Mon Sep 17 00:00:00 2001
From: Matt Devereau <matthew.devereau at arm.com>
Date: Mon, 22 Apr 2024 13:28:44 +0000
Subject: [PATCH] [LoopUnswitch] Allow i1 truncs in loop unswitch

With the addition of #84628, truncs to i1 are being
emitted as conditions to branch instructions. This caused
significant regressions in cases which were previously improved by
loop unswitch. Adding truncs to i1 restore the previous performance
seen.
---
 llvm/lib/Transforms/Utils/LoopUtils.cpp       |  10 +-
 .../SimpleLoopUnswitch/endless-unswitch.ll    |  93 +++++++++++++
 .../SimpleLoopUnswitch/partial-unswitch.ll    | 130 ++++++++++++++++++
 3 files changed, 232 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/Utils/LoopUtils.cpp b/llvm/lib/Transforms/Utils/LoopUtils.cpp
index 73c5d636782294..e10c5dcbd218aa 100644
--- a/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/llvm/lib/Transforms/Utils/LoopUtils.cpp
@@ -1930,7 +1930,15 @@ llvm::hasPartialIVCondition(const Loop &L, unsigned MSSAThreshold,
   if (!TI || !TI->isConditional())
     return {};
 
-  auto *CondI = dyn_cast<CmpInst>(TI->getCondition());
+  Instruction *CondI = nullptr;
+  CondI = dyn_cast<CmpInst>(TI->getCondition());
+
+  if (!CondI) {
+    CondI = dyn_cast<TruncInst>(TI->getCondition());
+    if (CondI && CondI->getType() != Type::getInt1Ty(TI->getContext())) {
+      return {};
+    }
+  }
   // The case with the condition outside the loop should already be handled
   // earlier.
   if (!CondI || !L.contains(CondI))
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/endless-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/endless-unswitch.ll
index 0d3aa8b243109e..a5ad182ad0b3e0 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/endless-unswitch.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/endless-unswitch.ll
@@ -106,3 +106,96 @@ for.inc:                                          ; preds = %for.cond5
   store i8 0, ptr @b, align 1
   br label %for.cond5
 }
+
+define void @e() {
+; CHECK-LABEL: @e(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    br i1 false, label [[FOR_END:%.*]], label [[FOR_COND]]
+; CHECK:       for.end:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr null, align 2
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i16 [[TMP0]] to i1
+; CHECK-NEXT:    br i1 [[TMP1]], label [[FOR_END_SPLIT:%.*]], label [[FOR_END_SPLIT_US:%.*]]
+; CHECK:       for.end.split.us:
+; CHECK-NEXT:    br label [[G_US:%.*]]
+; CHECK:       g.us:
+; CHECK-NEXT:    br label [[G_SPLIT_US6:%.*]]
+; CHECK:       for.cond1.us1:
+; CHECK-NEXT:    [[TMP2:%.*]] = load i16, ptr null, align 2
+; CHECK-NEXT:    [[TOBOOL4_NOT_US:%.*]] = trunc i16 [[TMP2]] to i1
+; CHECK-NEXT:    br i1 [[TOBOOL4_NOT_US]], label [[FOR_COND5_PREHEADER_US4:%.*]], label [[G_LOOPEXIT_US:%.*]]
+; CHECK:       for.cond5.us2:
+; CHECK-NEXT:    br i1 false, label [[FOR_COND1_LOOPEXIT_US5:%.*]], label [[FOR_INC_US3:%.*]]
+; CHECK:       for.inc.us3:
+; CHECK-NEXT:    store i8 0, ptr @b, align 1
+; CHECK-NEXT:    br label [[FOR_COND5_US2:%.*]]
+; CHECK:       for.cond5.preheader.us4:
+; CHECK-NEXT:    br label [[FOR_COND5_US2]]
+; CHECK:       for.cond1.loopexit.us5:
+; CHECK-NEXT:    br label [[FOR_COND1_US1:%.*]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK:       g.loopexit.us:
+; CHECK-NEXT:    br label [[G_US]]
+; CHECK:       g.split.us6:
+; CHECK-NEXT:    br label [[FOR_COND1_US1]]
+; CHECK:       for.end.split:
+; CHECK-NEXT:    br label [[G:%.*]]
+; CHECK:       g.loopexit:
+; CHECK-NEXT:    br label [[G]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK:       g:
+; CHECK-NEXT:    [[TMP3:%.*]] = load i16, ptr null, align 2
+; CHECK-NEXT:    [[TMP4:%.*]] = trunc i16 [[TMP3]] to i1
+; CHECK-NEXT:    br i1 [[TMP4]], label [[G_SPLIT_US:%.*]], label [[G_SPLIT:%.*]]
+; CHECK:       g.split.us:
+; CHECK-NEXT:    br label [[FOR_COND1_US:%.*]]
+; CHECK:       for.cond1.us:
+; CHECK-NEXT:    br label [[FOR_COND5_PREHEADER_US:%.*]]
+; CHECK:       for.cond5.us:
+; CHECK-NEXT:    br i1 false, label [[FOR_COND1_LOOPEXIT_US:%.*]], label [[FOR_INC_US:%.*]]
+; CHECK:       for.inc.us:
+; CHECK-NEXT:    store i8 0, ptr @b, align 1
+; CHECK-NEXT:    br label [[FOR_COND5_US:%.*]]
+; CHECK:       for.cond5.preheader.us:
+; CHECK-NEXT:    br label [[FOR_COND5_US]]
+; CHECK:       for.cond1.loopexit.us:
+; CHECK-NEXT:    br label [[FOR_COND1_US]]
+; CHECK:       g.split:
+; CHECK-NEXT:    br label [[FOR_COND1:%.*]]
+; CHECK:       for.cond1.loopexit:
+; CHECK-NEXT:    br label [[FOR_COND1]], !llvm.loop [[LOOP3]]
+; CHECK:       for.cond1:
+; CHECK-NEXT:    [[TMP5:%.*]] = load i16, ptr null, align 2
+; CHECK-NEXT:    [[TOBOOL4_NOT:%.*]] = trunc i16 [[TMP5]] to i1
+; CHECK-NEXT:    br i1 [[TOBOOL4_NOT]], label [[FOR_COND5_PREHEADER:%.*]], label [[G_LOOPEXIT:%.*]]
+; CHECK:       for.cond5.preheader:
+; CHECK-NEXT:    br label [[FOR_COND5:%.*]]
+; CHECK:       for.cond5:
+; CHECK-NEXT:    br i1 false, label [[FOR_COND1_LOOPEXIT:%.*]], label [[FOR_INC:%.*]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    store i8 0, ptr @b, align 1
+; CHECK-NEXT:    br label [[FOR_COND5]]
+;
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %entry
+  br i1 false, label %for.end, label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  br label %g
+
+g:                                                ; preds = %for.cond1, %for.end
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.cond5, %g
+  %0 = load i16, ptr null, align 2
+  %tobool4.not = trunc i16 %0 to i1
+  br i1 %tobool4.not, label %for.cond5, label %g
+
+for.cond5:                                        ; preds = %for.inc, %for.cond1
+  br i1 false, label %for.cond1, label %for.inc
+
+for.inc:                                          ; preds = %for.cond5
+  store i8 0, ptr @b, align 1
+  br label %for.cond5
+}
diff --git a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
index f97e5c3eec9d46..1d8942079ffd81 100644
--- a/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
+++ b/llvm/test/Transforms/SimpleLoopUnswitch/partial-unswitch.ll
@@ -1326,6 +1326,136 @@ exit:
   ret i32 10
 }
 
+define i32 @partial_unswitch_true_successor_trunc(ptr %ptr, i32 %N) {
+; CHECK-LABEL: @partial_unswitch_true_successor_trunc(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i1
+; CHECK-NEXT:    br i1 [[TMP1]], label [[ENTRY_SPLIT_US:%.*]], label [[ENTRY_SPLIT:%.*]]
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label [[LOOP_HEADER_US:%.*]]
+; CHECK:       loop.header.us:
+; CHECK-NEXT:    [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ]
+; CHECK-NEXT:    br label [[NOCLOBBER_US:%.*]]
+; CHECK:       noclobber.us:
+; CHECK-NEXT:    br label [[LOOP_LATCH_US]]
+; CHECK:       loop.latch.us:
+; CHECK-NEXT:    [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]]
+; CHECK-NEXT:    [[IV_NEXT_US]] = add i32 [[IV_US]], 1
+; CHECK-NEXT:    br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]]
+; CHECK:       exit.split.us:
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK:       loop.header:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    [[LV:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    [[SC:%.*]] = trunc i32 [[LV]] to i1
+; CHECK-NEXT:    br i1 [[SC]], label [[NOCLOBBER:%.*]], label [[CLOBBER:%.*]]
+; CHECK:       noclobber:
+; CHECK-NEXT:    br label [[LOOP_LATCH]]
+; CHECK:       clobber:
+; CHECK-NEXT:    call void @clobber()
+; CHECK-NEXT:    br label [[LOOP_LATCH]]
+; CHECK:       loop.latch:
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV]], [[N]]
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK:       exit.split:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 10
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  %lv = load i32, ptr %ptr
+  %sc = trunc i32 %lv to i1
+  br i1 %sc, label %noclobber, label %clobber
+
+noclobber:
+  br label %loop.latch
+
+clobber:
+  call void @clobber()
+  br label %loop.latch
+
+loop.latch:
+  %c = icmp ult i32 %iv, %N
+  %iv.next = add i32 %iv, 1
+  br i1 %c, label %loop.header, label %exit
+
+exit:
+  ret i32 10
+}
+
+define i32 @partial_unswitch_false_successor_trunc(ptr %ptr, i32 %N) {
+; CHECK-LABEL: @partial_unswitch_false_successor_trunc(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[TMP0]] to i1
+; CHECK-NEXT:    br i1 [[TMP1]], label [[ENTRY_SPLIT:%.*]], label [[ENTRY_SPLIT_US:%.*]]
+; CHECK:       entry.split.us:
+; CHECK-NEXT:    br label [[LOOP_HEADER_US:%.*]]
+; CHECK:       loop.header.us:
+; CHECK-NEXT:    [[IV_US:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT_US]] ], [ [[IV_NEXT_US:%.*]], [[LOOP_LATCH_US:%.*]] ]
+; CHECK-NEXT:    br label [[NOCLOBBER_US:%.*]]
+; CHECK:       noclobber.us:
+; CHECK-NEXT:    br label [[LOOP_LATCH_US]]
+; CHECK:       loop.latch.us:
+; CHECK-NEXT:    [[C_US:%.*]] = icmp ult i32 [[IV_US]], [[N:%.*]]
+; CHECK-NEXT:    [[IV_NEXT_US]] = add i32 [[IV_US]], 1
+; CHECK-NEXT:    br i1 [[C_US]], label [[LOOP_HEADER_US]], label [[EXIT_SPLIT_US:%.*]]
+; CHECK:       exit.split.us:
+; CHECK-NEXT:    br label [[EXIT:%.*]]
+; CHECK:       entry.split:
+; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
+; CHECK:       loop.header:
+; CHECK-NEXT:    [[IV:%.*]] = phi i32 [ 0, [[ENTRY_SPLIT]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
+; CHECK-NEXT:    [[LV:%.*]] = load i32, ptr [[PTR]], align 4
+; CHECK-NEXT:    [[SC:%.*]] = trunc i32 [[LV]] to i1
+; CHECK-NEXT:    br i1 [[SC]], label [[CLOBBER:%.*]], label [[NOCLOBBER:%.*]]
+; CHECK:       clobber:
+; CHECK-NEXT:    call void @clobber()
+; CHECK-NEXT:    br label [[LOOP_LATCH]]
+; CHECK:       noclobber:
+; CHECK-NEXT:    br label [[LOOP_LATCH]]
+; CHECK:       loop.latch:
+; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IV]], [[N]]
+; CHECK-NEXT:    [[IV_NEXT]] = add i32 [[IV]], 1
+; CHECK-NEXT:    br i1 [[C]], label [[LOOP_HEADER]], label [[EXIT_SPLIT:%.*]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK:       exit.split:
+; CHECK-NEXT:    br label [[EXIT]]
+; CHECK:       exit:
+; CHECK-NEXT:    ret i32 10
+;
+entry:
+  br label %loop.header
+
+loop.header:
+  %iv = phi i32 [ 0, %entry ], [ %iv.next, %loop.latch ]
+  %lv = load i32, ptr %ptr
+  %sc = trunc i32 %lv to i1
+  br i1 %sc, label %clobber, label %noclobber
+
+clobber:
+  call void @clobber()
+  br label %loop.latch
+
+noclobber:
+  br label %loop.latch
+
+loop.latch:
+  %c = icmp ult i32 %iv, %N
+  %iv.next = add i32 %iv, 1
+  br i1 %c, label %loop.header, label %exit
+
+exit:
+  ret i32 10
+}
+
 ; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[UNSWITCH_PARTIAL_DISABLE:![0-9]+]]}
 ; CHECK: [[UNSWITCH_PARTIAL_DISABLE]] = !{!"llvm.loop.unswitch.partial.disable"}
 ; CHECK: [[LOOP2]] = distinct !{[[LOOP2]], [[UNSWITCH_PARTIAL_DISABLE]]}



More information about the llvm-commits mailing list