[llvm] 6b03ce3 - [LICM] Simplify (X < A && X < B) into (X < MIN(A, B)) if MIN(A, B) is loop-invariant
Max Kazantsev via llvm-commits
llvm-commits at lists.llvm.org
Fri Mar 10 02:37:00 PST 2023
Author: Max Kazantsev
Date: 2023-03-10T17:36:52+07:00
New Revision: 6b03ce374e0dc64868b4b6665056dfc3fda0e98f
URL: https://github.com/llvm/llvm-project/commit/6b03ce374e0dc64868b4b6665056dfc3fda0e98f
DIFF: https://github.com/llvm/llvm-project/commit/6b03ce374e0dc64868b4b6665056dfc3fda0e98f.diff
LOG: [LICM] Simplify (X < A && X < B) into (X < MIN(A, B)) if MIN(A, B) is loop-invariant
We don't do this transform in InstCombine in general case for arbitrary values, because cost of
AND and 2 ICMP's isn't higher than of MIN and ICMP. However, LICM also has a notion
about the loop structure. This transform becomes profitable if `A` and `B` are loop-invariant and
`X` is not: by doing this, we can compute min outside the loop.
Differential Revision: https://reviews.llvm.org/D143726
Reviewed By: nikic
Added:
Modified:
llvm/lib/Transforms/Scalar/LICM.cpp
llvm/test/CodeGen/AMDGPU/wave32.ll
llvm/test/CodeGen/PowerPC/sms-cpy-1.ll
llvm/test/Transforms/LICM/min_max.ll
Removed:
################################################################################
diff --git a/llvm/lib/Transforms/Scalar/LICM.cpp b/llvm/lib/Transforms/Scalar/LICM.cpp
index 376f20a5a6131..78b05ba07081a 100644
--- a/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/llvm/lib/Transforms/Scalar/LICM.cpp
@@ -68,6 +68,7 @@
#include "llvm/IR/Dominators.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/IntrinsicInst.h"
+#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Metadata.h"
#include "llvm/IR/PatternMatch.h"
@@ -102,6 +103,8 @@ STATISTIC(NumMovedCalls, "Number of call insts hoisted or sunk");
STATISTIC(NumPromotionCandidates, "Number of promotion candidates");
STATISTIC(NumLoadPromoted, "Number of load-only promotions");
STATISTIC(NumLoadStorePromoted, "Number of load and store promotions");
+STATISTIC(NumMinMaxHoisted,
+ "Number min/max expressions hoisted out of the loop");
/// Memory promotion is enabled by default.
static cl::opt<bool>
@@ -167,6 +170,11 @@ static bool pointerInvalidatedByLoop(MemorySSA *MSSA, MemoryUse *MU,
bool InvariantGroup);
static bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA,
MemoryUse &MU);
+/// Try to simplify things like (A < INV_1 AND icmp A < INV_2) into (A <
+/// min(INV_1, INV_2)), if INV_1 and INV_2 are both loop invariants and their
+/// minimun can be computed outside of loop, and X is not a loop-invariant.
+static bool hoistMinMax(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
+ MemorySSAUpdater &MSSAU);
static Instruction *cloneInstructionInExitBlock(
Instruction &I, BasicBlock &ExitBlock, PHINode &PN, const LoopInfo *LI,
const LoopSafetyInfo *SafetyInfo, MemorySSAUpdater &MSSAU);
@@ -981,6 +989,15 @@ bool llvm::hoistRegion(DomTreeNode *N, AAResults *AA, LoopInfo *LI,
}
}
+ // Optimize complex patterns, such as (x < INV1 && x < INV2), turning them
+ // into (x < min(INV1, INV2)), and hoisting the invariant part of this
+ // expression out of the loop.
+ if (hoistMinMax(I, *CurLoop, *SafetyInfo, MSSAU)) {
+ ++NumMinMaxHoisted;
+ Changed = true;
+ continue;
+ }
+
// Remember possibly hoistable branches so we can actually hoist them
// later if needed.
if (BranchInst *BI = dyn_cast<BranchInst>(&I))
@@ -2396,6 +2413,68 @@ bool pointerInvalidatedByBlock(BasicBlock &BB, MemorySSA &MSSA, MemoryUse &MU) {
return false;
}
+static bool hoistMinMax(Instruction &I, Loop &L, ICFLoopSafetyInfo &SafetyInfo,
+ MemorySSAUpdater &MSSAU) {
+ auto *Preheader = L.getLoopPreheader();
+ assert(Preheader && "Loop is not in simplify form?");
+ bool Inverse = false;
+ using namespace PatternMatch;
+ Value *Cond1, *Cond2;
+ if (match(&I, m_Or(m_Value(Cond1), m_Value(Cond2))))
+ Inverse = true;
+ else if (!match(&I, m_And(m_Value(Cond1), m_Value(Cond2))))
+ return false;
+
+ auto MatchICmpAgainstInvariant = [&](Value *C, ICmpInst::Predicate &P,
+ Value *&LHS, Value *&RHS) {
+ if (!match(C, m_OneUse(m_ICmp(P, m_Value(LHS), m_Value(RHS)))))
+ return false;
+ if (!ICmpInst::isRelational(P))
+ return false;
+ if (L.isLoopInvariant(LHS)) {
+ std::swap(LHS, RHS);
+ P = ICmpInst::getSwappedPredicate(P);
+ }
+ if (L.isLoopInvariant(LHS) || !L.isLoopInvariant(RHS))
+ return false;
+ if (Inverse)
+ P = ICmpInst::getInversePredicate(P);
+ return true;
+ };
+ ICmpInst::Predicate P1, P2;
+ Value *LHS1, *LHS2, *RHS1, *RHS2;
+ if (!MatchICmpAgainstInvariant(Cond1, P1, LHS1, RHS1) ||
+ !MatchICmpAgainstInvariant(Cond2, P2, LHS2, RHS2))
+ return false;
+ if (P1 != P2 || LHS1 != LHS2)
+ return false;
+
+ // Everything is fine, we can do the transform.
+ bool UseMin = ICmpInst::isLT(P1) || ICmpInst::isLE(P1);
+ assert(
+ (UseMin || ICmpInst::isGT(P1) || ICmpInst::isGE(P1)) &&
+ "Relational predicate is either less (or equal) or greater (or equal)!");
+ Intrinsic::ID id = ICmpInst::isSigned(P1)
+ ? (UseMin ? Intrinsic::smin : Intrinsic::smax)
+ : (UseMin ? Intrinsic::umin : Intrinsic::umax);
+ IRBuilder<> Builder(Preheader->getTerminator());
+ Value *NewRHS = Builder.CreateBinaryIntrinsic(
+ id, RHS1, RHS2, nullptr, StringRef("invariant.") +
+ (ICmpInst::isSigned(P1) ? "s" : "u") +
+ (UseMin ? "min" : "max"));
+ Builder.SetInsertPoint(&I);
+ ICmpInst::Predicate P = P1;
+ if (Inverse)
+ P = ICmpInst::getInversePredicate(P);
+ Value *NewCond = Builder.CreateICmp(P, LHS1, NewRHS);
+ NewCond->takeName(&I);
+ I.replaceAllUsesWith(NewCond);
+ eraseInstruction(I, SafetyInfo, MSSAU);
+ eraseInstruction(*cast<Instruction>(Cond1), SafetyInfo, MSSAU);
+ eraseInstruction(*cast<Instruction>(Cond2), SafetyInfo, MSSAU);
+ return true;
+}
+
/// Little predicate that returns true if the specified basic block is in
/// a subloop of the current one, not the current one itself.
///
diff --git a/llvm/test/CodeGen/AMDGPU/wave32.ll b/llvm/test/CodeGen/AMDGPU/wave32.ll
index e8ac9a1b21fb8..9d06212894a2a 100644
--- a/llvm/test/CodeGen/AMDGPU/wave32.ll
+++ b/llvm/test/CodeGen/AMDGPU/wave32.ll
@@ -521,40 +521,37 @@ define amdgpu_kernel void @test_loop_with_if_else_break(ptr addrspace(1) %arg) #
; GFX1032-NEXT: s_cbranch_execz .LBB11_6
; GFX1032-NEXT: ; %bb.1: ; %.preheader
; GFX1032-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX1032-NEXT: v_mov_b32_e32 v1, 0
-; GFX1032-NEXT: s_mov_b32 s3, 1
-; GFX1032-NEXT: ; implicit-def: $sgpr4
+; GFX1032-NEXT: v_min_u32_e32 v1, 0x100, v0
+; GFX1032-NEXT: v_mov_b32_e32 v2, 0
+; GFX1032-NEXT: s_mov_b32 s4, 0
+; GFX1032-NEXT: ; implicit-def: $sgpr3
; GFX1032-NEXT: s_branch .LBB11_4
; GFX1032-NEXT: .LBB11_2: ; %bb8
; GFX1032-NEXT: ; in Loop: Header=BB11_4 Depth=1
-; GFX1032-NEXT: v_cmp_ge_u32_e32 vcc_lo, s3, v0
-; GFX1032-NEXT: s_cmpk_gt_u32 s3, 0xff
-; GFX1032-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1032-NEXT: s_cselect_b32 s5, -1, 0
-; GFX1032-NEXT: s_add_i32 s3, s3, 1
-; GFX1032-NEXT: s_or_b32 s5, s5, vcc_lo
-; GFX1032-NEXT: s_waitcnt_depctr 0xffe3
+; GFX1032-NEXT: s_add_i32 s4, s4, 1
+; GFX1032-NEXT: global_store_dword v2, v0, s[0:1]
+; GFX1032-NEXT: v_cmp_ge_u32_e32 vcc_lo, s4, v1
; GFX1032-NEXT: s_add_u32 s0, s0, 4
; GFX1032-NEXT: s_addc_u32 s1, s1, 0
-; GFX1032-NEXT: s_andn2_b32 s4, s4, exec_lo
-; GFX1032-NEXT: s_and_b32 s5, s5, exec_lo
-; GFX1032-NEXT: s_or_b32 s4, s4, s5
+; GFX1032-NEXT: s_andn2_b32 s3, s3, exec_lo
+; GFX1032-NEXT: s_and_b32 s5, vcc_lo, exec_lo
+; GFX1032-NEXT: s_or_b32 s3, s3, s5
; GFX1032-NEXT: .LBB11_3: ; %Flow
; GFX1032-NEXT: ; in Loop: Header=BB11_4 Depth=1
-; GFX1032-NEXT: s_and_b32 s5, exec_lo, s4
+; GFX1032-NEXT: s_and_b32 s5, exec_lo, s3
; GFX1032-NEXT: s_or_b32 s2, s5, s2
; GFX1032-NEXT: s_andn2_b32 exec_lo, exec_lo, s2
; GFX1032-NEXT: s_cbranch_execz .LBB11_6
; GFX1032-NEXT: .LBB11_4: ; %bb2
; GFX1032-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1032-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1032-NEXT: global_load_dword v2, v1, s[0:1]
-; GFX1032-NEXT: s_or_b32 s4, s4, exec_lo
+; GFX1032-NEXT: global_load_dword v3, v2, s[0:1]
+; GFX1032-NEXT: s_or_b32 s3, s3, exec_lo
; GFX1032-NEXT: s_waitcnt vmcnt(0)
-; GFX1032-NEXT: v_cmp_gt_i32_e32 vcc_lo, 11, v2
+; GFX1032-NEXT: v_cmp_gt_i32_e32 vcc_lo, 11, v3
; GFX1032-NEXT: s_cbranch_vccz .LBB11_2
; GFX1032-NEXT: ; %bb.5: ; in Loop: Header=BB11_4 Depth=1
-; GFX1032-NEXT: ; implicit-def: $sgpr3
+; GFX1032-NEXT: ; implicit-def: $sgpr4
; GFX1032-NEXT: ; implicit-def: $sgpr0_sgpr1
; GFX1032-NEXT: s_branch .LBB11_3
; GFX1032-NEXT: .LBB11_6: ; %.loopexit
@@ -563,28 +560,25 @@ define amdgpu_kernel void @test_loop_with_if_else_break(ptr addrspace(1) %arg) #
; GFX1064-LABEL: test_loop_with_if_else_break:
; GFX1064: ; %bb.0: ; %bb
; GFX1064-NEXT: v_cmp_ne_u32_e32 vcc, 0, v0
+; GFX1064-NEXT: s_mov_b32 s6, 0
; GFX1064-NEXT: s_and_saveexec_b64 s[2:3], vcc
; GFX1064-NEXT: s_cbranch_execz .LBB11_6
; GFX1064-NEXT: ; %bb.1: ; %.preheader
; GFX1064-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
-; GFX1064-NEXT: v_mov_b32_e32 v1, 0
-; GFX1064-NEXT: s_mov_b32 s6, 1
+; GFX1064-NEXT: v_min_u32_e32 v1, 0x100, v0
+; GFX1064-NEXT: v_mov_b32_e32 v2, 0
; GFX1064-NEXT: s_mov_b64 s[2:3], 0
; GFX1064-NEXT: ; implicit-def: $sgpr4_sgpr5
; GFX1064-NEXT: s_branch .LBB11_4
; GFX1064-NEXT: .LBB11_2: ; %bb8
; GFX1064-NEXT: ; in Loop: Header=BB11_4 Depth=1
-; GFX1064-NEXT: v_cmp_ge_u32_e32 vcc, s6, v0
-; GFX1064-NEXT: s_cmpk_gt_u32 s6, 0xff
-; GFX1064-NEXT: global_store_dword v1, v0, s[0:1]
-; GFX1064-NEXT: s_cselect_b64 s[8:9], -1, 0
; GFX1064-NEXT: s_add_i32 s6, s6, 1
-; GFX1064-NEXT: s_or_b64 s[8:9], s[8:9], vcc
-; GFX1064-NEXT: s_waitcnt_depctr 0xffe3
+; GFX1064-NEXT: global_store_dword v2, v0, s[0:1]
+; GFX1064-NEXT: v_cmp_ge_u32_e32 vcc, s6, v1
; GFX1064-NEXT: s_add_u32 s0, s0, 4
; GFX1064-NEXT: s_addc_u32 s1, s1, 0
; GFX1064-NEXT: s_andn2_b64 s[4:5], s[4:5], exec
-; GFX1064-NEXT: s_and_b64 s[8:9], s[8:9], exec
+; GFX1064-NEXT: s_and_b64 s[8:9], vcc, exec
; GFX1064-NEXT: s_or_b64 s[4:5], s[4:5], s[8:9]
; GFX1064-NEXT: .LBB11_3: ; %Flow
; GFX1064-NEXT: ; in Loop: Header=BB11_4 Depth=1
@@ -595,10 +589,10 @@ define amdgpu_kernel void @test_loop_with_if_else_break(ptr addrspace(1) %arg) #
; GFX1064-NEXT: .LBB11_4: ; %bb2
; GFX1064-NEXT: ; =>This Inner Loop Header: Depth=1
; GFX1064-NEXT: s_waitcnt lgkmcnt(0)
-; GFX1064-NEXT: global_load_dword v2, v1, s[0:1]
+; GFX1064-NEXT: global_load_dword v3, v2, s[0:1]
; GFX1064-NEXT: s_or_b64 s[4:5], s[4:5], exec
; GFX1064-NEXT: s_waitcnt vmcnt(0)
-; GFX1064-NEXT: v_cmp_gt_i32_e32 vcc, 11, v2
+; GFX1064-NEXT: v_cmp_gt_i32_e32 vcc, 11, v3
; GFX1064-NEXT: s_cbranch_vccz .LBB11_2
; GFX1064-NEXT: ; %bb.5: ; in Loop: Header=BB11_4 Depth=1
; GFX1064-NEXT: ; implicit-def: $sgpr6
diff --git a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll
index 5b8742b6f61af..33dfd2268bd2b 100644
--- a/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll
+++ b/llvm/test/CodeGen/PowerPC/sms-cpy-1.ll
@@ -13,18 +13,16 @@ define void @print_res() nounwind {
; CHECK-NEXT: lwz 3, 0(3)
; CHECK-NEXT: addi 3, 3, -1
; CHECK-NEXT: clrldi 4, 3, 32
-; CHECK-NEXT: cmplwi 3, 1
-; CHECK-NEXT: li 3, 1
-; CHECK-NEXT: iselgt 3, 4, 3
-; CHECK-NEXT: li 4, 2
-; CHECK-NEXT: addi 3, 3, -1
-; CHECK-NEXT: cmpldi 3, 2
-; CHECK-NEXT: isellt 3, 3, 4
+; CHECK-NEXT: cmplwi 3, 3
+; CHECK-NEXT: li 3, 3
+; CHECK-NEXT: isellt 3, 4, 3
+; CHECK-NEXT: li 4, 1
+; CHECK-NEXT: cmpldi 3, 1
+; CHECK-NEXT: iselgt 3, 3, 4
; CHECK-NEXT: li 4, 0
-; CHECK-NEXT: addi 3, 3, 1
; CHECK-NEXT: li 5, 0
-; CHECK-NEXT: li 7, -1
; CHECK-NEXT: mtctr 3
+; CHECK-NEXT: li 7, -1
; CHECK-NEXT: lbz 5, 0(5)
; CHECK-NEXT: li 3, 1
; CHECK-NEXT: bdz .LBB0_6
diff --git a/llvm/test/Transforms/LICM/min_max.ll b/llvm/test/Transforms/LICM/min_max.ll
index d47f2d174a0e7..d9710daa1645f 100644
--- a/llvm/test/Transforms/LICM/min_max.ll
+++ b/llvm/test/Transforms/LICM/min_max.ll
@@ -1,16 +1,15 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S %s -passes='loop-mssa(licm)' -verify-memoryssa | FileCheck %s
-; TODO: turn to %iv <u umin(inv_1, inv_2) and hoist it out of loop.
+; turn to %iv <u umin(inv_1, inv_2) and hoist it out of loop.
define i32 @test_ult(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_ult(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i32 [[IV]], [[INV_1:%.*]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i32 [[IV]], [[INV_2:%.*]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV]], [[INVARIANT_UMIN]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -32,16 +31,15 @@ exit:
ret i32 %iv
}
-; TODO: turn to %iv <=u umin(inv_1, inv_2) and hoist it out of loop.
+; turn to %iv <=u umin(inv_1, inv_2) and hoist it out of loop.
define i32 @test_ule(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_ule(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp ule i32 [[IV]], [[INV_1:%.*]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp ule i32 [[IV]], [[INV_2:%.*]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ule i32 [[IV]], [[INVARIANT_UMIN]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -63,16 +61,15 @@ exit:
ret i32 %iv
}
-; TODO: turn to %iv <s smin(inv_1, inv_2) and hoist it out of loop.
+; turn to %iv <s smin(inv_1, inv_2) and hoist it out of loop.
define i32 @test_slt(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_slt(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp slt i32 [[IV]], [[INV_1:%.*]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp slt i32 [[IV]], [[INV_2:%.*]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_SMIN]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -94,16 +91,15 @@ exit:
ret i32 %iv
}
-; TODO: turn to %iv <=s smin(inv_1, inv_2) and hoist it out of loop.
+; turn to %iv <=s smin(inv_1, inv_2) and hoist it out of loop.
define i32 @test_sle(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_sle(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp sle i32 [[IV]], [[INV_1:%.*]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp sle i32 [[IV]], [[INV_2:%.*]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp sle i32 [[IV]], [[INVARIANT_SMIN]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -125,16 +121,15 @@ exit:
ret i32 %iv
}
-; TODO: turn to %iv >u umax(inv_1, inv_2) and hoist it out of loop.
+; turn to %iv >u umax(inv_1, inv_2) and hoist it out of loop.
define i32 @test_ugt(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_ugt(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp ugt i32 [[IV]], [[INV_1:%.*]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp ugt i32 [[IV]], [[INV_2:%.*]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ugt i32 [[IV]], [[INVARIANT_UMAX]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -156,16 +151,15 @@ exit:
ret i32 %iv
}
-; TODO: turn to %iv >=u umax(inv_1, inv_2) and hoist it out of loop.
+; turn to %iv >=u umax(inv_1, inv_2) and hoist it out of loop.
define i32 @test_uge(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_uge(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp uge i32 [[IV]], [[INV_1:%.*]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp uge i32 [[IV]], [[INV_2:%.*]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp uge i32 [[IV]], [[INVARIANT_UMAX]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -187,16 +181,15 @@ exit:
ret i32 %iv
}
-; TODO: turn to %iv >s smax(inv_1, inv_2) and hoist it out of loop.
+; turn to %iv >s smax(inv_1, inv_2) and hoist it out of loop.
define i32 @test_sgt(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_sgt(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[IV]], [[INV_1:%.*]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp sgt i32 [[IV]], [[INV_2:%.*]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp sgt i32 [[IV]], [[INVARIANT_SMAX]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -218,16 +211,15 @@ exit:
ret i32 %iv
}
-; TODO: turn to %iv >=s smax(inv_1, inv_2) and hoist it out of loop.
+; turn to %iv >=s smax(inv_1, inv_2) and hoist it out of loop.
define i32 @test_sge(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_sge(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp sge i32 [[IV]], [[INV_1:%.*]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp sge i32 [[IV]], [[INV_2:%.*]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp sge i32 [[IV]], [[INVARIANT_SMAX]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -249,16 +241,15 @@ exit:
ret i32 %iv
}
-; TODO: Turn OR to AND and handle accordingly.
+; Turn OR to AND and handle accordingly.
define i32 @test_ult_inv(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_ult_inv(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i32 [[IV]], [[INV_1:%.*]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i32 [[IV]], [[INV_2:%.*]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = or i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV]], [[INVARIANT_UMAX]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -280,16 +271,15 @@ exit:
ret i32 %iv
}
-; TODO: Turn OR to AND and handle accordingly.
+; Turn OR to AND and handle accordingly.
define i32 @test_ule_inv(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_ule_inv(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp ule i32 [[IV]], [[INV_1:%.*]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp ule i32 [[IV]], [[INV_2:%.*]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = or i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ule i32 [[IV]], [[INVARIANT_UMAX]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -311,16 +301,15 @@ exit:
ret i32 %iv
}
-; TODO: Turn OR to AND and handle accordingly.
+; Turn OR to AND and handle accordingly.
define i32 @test_slt_inv(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_slt_inv(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp slt i32 [[IV]], [[INV_1:%.*]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp slt i32 [[IV]], [[INV_2:%.*]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = or i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_SMAX]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -342,16 +331,15 @@ exit:
ret i32 %iv
}
-; TODO: Turn OR to AND and handle accordingly.
+; Turn OR to AND and handle accordingly.
define i32 @test_sle_inv(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_sle_inv(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp sle i32 [[IV]], [[INV_1:%.*]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp sle i32 [[IV]], [[INV_2:%.*]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = or i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp sle i32 [[IV]], [[INVARIANT_SMAX]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -373,16 +361,15 @@ exit:
ret i32 %iv
}
-; TODO: Turn OR to AND and handle accordingly.
+; Turn OR to AND and handle accordingly.
define i32 @test_ugt_inv(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_ugt_inv(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp ugt i32 [[IV]], [[INV_1:%.*]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp ugt i32 [[IV]], [[INV_2:%.*]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = or i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ugt i32 [[IV]], [[INVARIANT_UMIN]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -404,16 +391,15 @@ exit:
ret i32 %iv
}
-; TODO: Turn OR to AND and handle accordingly.
+; Turn OR to AND and handle accordingly.
define i32 @test_uge_inv(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_uge_inv(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp uge i32 [[IV]], [[INV_1:%.*]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp uge i32 [[IV]], [[INV_2:%.*]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = or i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp uge i32 [[IV]], [[INVARIANT_UMIN]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -435,16 +421,15 @@ exit:
ret i32 %iv
}
-; TODO: Turn OR to AND and handle accordingly.
+; Turn OR to AND and handle accordingly.
define i32 @test_sgt_inv(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_sgt_inv(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[IV]], [[INV_1:%.*]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp sgt i32 [[IV]], [[INV_2:%.*]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = or i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp sgt i32 [[IV]], [[INVARIANT_SMIN]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -466,16 +451,15 @@ exit:
ret i32 %iv
}
-; TODO: Turn OR to AND and handle accordingly.
+; Turn OR to AND and handle accordingly.
define i32 @test_sge_inv(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_sge_inv(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp sge i32 [[IV]], [[INV_1:%.*]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp sge i32 [[IV]], [[INV_2:%.*]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = or i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp sge i32 [[IV]], [[INVARIANT_SMIN]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -497,16 +481,15 @@ exit:
ret i32 %iv
}
-; TODO: turn to %iv <u umin(inv_1, inv_2) and hoist it out of loop.
+; turn to %iv <u umin(inv_1, inv_2) and hoist it out of loop.
define i32 @test_ult_swapped(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_ult_swapped(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp ugt i32 [[INV_1:%.*]], [[IV]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp ugt i32 [[INV_2:%.*]], [[IV]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV]], [[INVARIANT_UMIN]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -528,16 +511,15 @@ exit:
ret i32 %iv
}
-; TODO: turn to %iv <=u umin(inv_1, inv_2) and hoist it out of loop.
+; turn to %iv <=u umin(inv_1, inv_2) and hoist it out of loop.
define i32 @test_ule_swapped(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_ule_swapped(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp uge i32 [[INV_1:%.*]], [[IV]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp uge i32 [[INV_2:%.*]], [[IV]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ule i32 [[IV]], [[INVARIANT_UMIN]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -559,16 +541,15 @@ exit:
ret i32 %iv
}
-; TODO: turn to %iv <s smin(inv_1, inv_2) and hoist it out of loop.
+; turn to %iv <s smin(inv_1, inv_2) and hoist it out of loop.
define i32 @test_slt_swapped(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_slt_swapped(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[INV_1:%.*]], [[IV]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp sgt i32 [[INV_2:%.*]], [[IV]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp slt i32 [[IV]], [[INVARIANT_SMIN]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -590,16 +571,15 @@ exit:
ret i32 %iv
}
-; TODO: turn to %iv <=s smin(inv_1, inv_2) and hoist it out of loop.
+; turn to %iv <=s smin(inv_1, inv_2) and hoist it out of loop.
define i32 @test_sle_swapped(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_sle_swapped(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp sge i32 [[INV_1:%.*]], [[IV]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp sge i32 [[INV_2:%.*]], [[IV]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp sle i32 [[IV]], [[INVARIANT_SMIN]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -621,16 +601,15 @@ exit:
ret i32 %iv
}
-; TODO: turn to %iv >u umax(inv_1, inv_2) and hoist it out of loop.
+; turn to %iv >u umax(inv_1, inv_2) and hoist it out of loop.
define i32 @test_ugt_swapped(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_ugt_swapped(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i32 [[INV_1:%.*]], [[IV]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i32 [[INV_2:%.*]], [[IV]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ugt i32 [[IV]], [[INVARIANT_UMAX]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -652,16 +631,15 @@ exit:
ret i32 %iv
}
-; TODO: turn to %iv >=u umax(inv_1, inv_2) and hoist it out of loop.
+; turn to %iv >=u umax(inv_1, inv_2) and hoist it out of loop.
define i32 @test_uge_swapped(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_uge_swapped(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_UMAX:%.*]] = call i32 @llvm.umax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp ule i32 [[INV_1:%.*]], [[IV]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp ule i32 [[INV_2:%.*]], [[IV]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp uge i32 [[IV]], [[INVARIANT_UMAX]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -683,16 +661,15 @@ exit:
ret i32 %iv
}
-; TODO: turn to %iv >s smax(inv_1, inv_2) and hoist it out of loop.
+; turn to %iv >s smax(inv_1, inv_2) and hoist it out of loop.
define i32 @test_sgt_swapped(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_sgt_swapped(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp slt i32 [[INV_1:%.*]], [[IV]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp slt i32 [[INV_2:%.*]], [[IV]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp sgt i32 [[IV]], [[INVARIANT_SMAX]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -714,16 +691,15 @@ exit:
ret i32 %iv
}
-; TODO: turn to %iv >=s smax(inv_1, inv_2) and hoist it out of loop.
+; turn to %iv >=s smax(inv_1, inv_2) and hoist it out of loop.
define i32 @test_sge_swapped(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_sge_swapped(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp sle i32 [[INV_1:%.*]], [[IV]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp sle i32 [[INV_2:%.*]], [[IV]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp sge i32 [[IV]], [[INVARIANT_SMAX]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
@@ -1128,16 +1104,15 @@ exit:
ret i32 %iv
}
-; TODO: This can be optimized, despite the fact that loop_cond has other uses.
+; This can be optimized, despite the fact that loop_cond has other uses.
define i32 @test_ult_extra_use_result_pos(i32 %start, i32 %inv_1, i32 %inv_2) {
; CHECK-LABEL: @test_ult_extra_use_result_pos(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[INVARIANT_UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[INV_1:%.*]], i32 [[INV_2:%.*]])
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[START:%.*]], [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
-; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i32 [[IV]], [[INV_1:%.*]]
-; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i32 [[IV]], [[INV_2:%.*]]
-; CHECK-NEXT: [[LOOP_COND:%.*]] = and i1 [[CMP_1]], [[CMP_2]]
+; CHECK-NEXT: [[LOOP_COND:%.*]] = icmp ult i32 [[IV]], [[INVARIANT_UMIN]]
; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1
; CHECK-NEXT: br i1 [[LOOP_COND]], label [[LOOP]], label [[EXIT:%.*]]
; CHECK: exit:
More information about the llvm-commits
mailing list