[llvm] 731ae69 - Revert "[CodeGenPrepare] Folding `urem` with loop invariant value"
Noah Goldstein via llvm-commits
llvm-commits at lists.llvm.org
Sun Aug 18 20:37:16 PDT 2024
Author: Noah Goldstein
Date: 2024-08-18T20:36:35-07:00
New Revision: 731ae694a3d8f4d39e855c9a82c97d4f170fd48a
URL: https://github.com/llvm/llvm-project/commit/731ae694a3d8f4d39e855c9a82c97d4f170fd48a
DIFF: https://github.com/llvm/llvm-project/commit/731ae694a3d8f4d39e855c9a82c97d4f170fd48a.diff
LOG: Revert "[CodeGenPrepare] Folding `urem` with loop invariant value"
This reverts commit c64ce8bf283120fd145a57d0e61f9697f719139d.
Seems to be causing stage2 failures on buildbots. Reverting while I
investigate.
Added:
Modified:
llvm/lib/CodeGen/CodeGenPrepare.cpp
llvm/test/CodeGen/X86/fold-loop-of-urem.ll
llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index 72db1f4adabf76..48253a613b41d2 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -472,7 +472,6 @@ class CodeGenPrepare {
bool replaceMathCmpWithIntrinsic(BinaryOperator *BO, Value *Arg0, Value *Arg1,
CmpInst *Cmp, Intrinsic::ID IID);
bool optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT);
- bool optimizeURem(Instruction *Rem);
bool combineToUSubWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
bool combineToUAddWithOverflow(CmpInst *Cmp, ModifyDT &ModifiedDT);
void verifyBFIUpdates(Function &F);
@@ -1976,132 +1975,6 @@ static bool foldFCmpToFPClassTest(CmpInst *Cmp, const TargetLowering &TLI,
return true;
}
-static bool isRemOfLoopIncrementWithLoopInvariant(Instruction *Rem,
- const LoopInfo *LI,
- Value *&RemAmtOut,
- PHINode *&LoopIncrPNOut) {
- Value *Incr, *RemAmt;
- // NB: If RemAmt is a power of 2 it *should* have been transformed by now.
- if (!match(Rem, m_URem(m_Value(Incr), m_Value(RemAmt))))
- return false;
-
- // Find out loop increment PHI.
- auto *PN = dyn_cast<PHINode>(Incr);
- if (!PN)
- return false;
-
- // This isn't strictly necessary, what we really need is one increment and any
- // amount of initial values all being the same.
- if (PN->getNumIncomingValues() != 2)
- return false;
-
- // Only trivially analyzable loops.
- Loop *L = LI->getLoopFor(Rem->getParent());
- if (!L || !L->getLoopPreheader() || !L->getLoopLatch())
- return false;
-
- // Only works if the remainder amount is a loop invaraint
- if (!L->isLoopInvariant(RemAmt))
- return false;
-
- // Is the PHI a loop increment?
- auto LoopIncrInfo = getIVIncrement(PN, LI);
- if (!LoopIncrInfo)
- return false;
-
- // getIVIncrement finds the loop at PN->getParent(). This might be a
diff erent
- // loop from the loop with Rem->getParent().
- if (L->getHeader() != PN->getParent())
- return false;
-
- // We need remainder_amount % increment_amount to be zero. Increment of one
- // satisfies that without any special logic and is overwhelmingly the common
- // case.
- if (!match(LoopIncrInfo->second, m_One()))
- return false;
-
- // Need the increment to not overflow.
- if (!match(LoopIncrInfo->first, m_NUWAdd(m_Value(), m_Value())))
- return false;
-
- // Set output variables.
- RemAmtOut = RemAmt;
- LoopIncrPNOut = PN;
-
- return true;
-}
-
-// Try to transform:
-//
-// for(i = Start; i < End; ++i)
-// Rem = (i nuw+ IncrLoopInvariant) u% RemAmtLoopInvariant;
-//
-// ->
-//
-// Rem = (Start nuw+ IncrLoopInvariant) % RemAmtLoopInvariant;
-// for(i = Start; i < End; ++i, ++rem)
-// Rem = rem == RemAmtLoopInvariant ? 0 : Rem;
-//
-// Currently only implemented for `IncrLoopInvariant` being zero.
-static bool foldURemOfLoopIncrement(Instruction *Rem, const DataLayout *DL,
- const LoopInfo *LI,
- SmallSet<BasicBlock *, 32> &FreshBBs,
- bool IsHuge) {
- Value *RemAmt;
- PHINode *LoopIncrPN;
- if (!isRemOfLoopIncrementWithLoopInvariant(Rem, LI, RemAmt, LoopIncrPN))
- return false;
-
- // Only non-constant remainder as the extra IV is probably not profitable
- // in that case.
- //
- // Potential TODO(1): `urem` of a const ends up as `mul` + `shift` + `add`. If
- // we can rule out register pressure and ensure this `urem` is executed each
- // iteration, its probably profitable to handle the const case as well.
- //
- // Potential TODO(2): Should we have a check for how "nested" this remainder
- // operation is? The new code runs every iteration so if the remainder is
- // guarded behind unlikely conditions this might not be worth it.
- if (match(RemAmt, m_ImmConstant()))
- return false;
- Loop *L = LI->getLoopFor(Rem->getParent());
-
- Value *Start = LoopIncrPN->getIncomingValueForBlock(L->getLoopPreheader());
-
- // Create new remainder with induction variable.
- Type *Ty = Rem->getType();
- IRBuilder<> Builder(Rem->getContext());
-
- Builder.SetInsertPoint(LoopIncrPN);
- PHINode *NewRem = Builder.CreatePHI(Ty, 2);
-
- Builder.SetInsertPoint(cast<Instruction>(
- LoopIncrPN->getIncomingValueForBlock(L->getLoopLatch())));
- // `(add (urem x, y), 1)` is always nuw.
- Value *RemAdd = Builder.CreateNUWAdd(NewRem, ConstantInt::get(Ty, 1));
- Value *RemCmp = Builder.CreateICmp(ICmpInst::ICMP_EQ, RemAdd, RemAmt);
- Value *RemSel =
- Builder.CreateSelect(RemCmp, Constant::getNullValue(Ty), RemAdd);
-
- NewRem->addIncoming(Start, L->getLoopPreheader());
- NewRem->addIncoming(RemSel, L->getLoopLatch());
-
- // Insert all touched BBs.
- FreshBBs.insert(LoopIncrPN->getParent());
- FreshBBs.insert(L->getLoopLatch());
- FreshBBs.insert(Rem->getParent());
-
- replaceAllUsesWith(Rem, NewRem, FreshBBs, IsHuge);
- Rem->eraseFromParent();
- return true;
-}
-
-bool CodeGenPrepare::optimizeURem(Instruction *Rem) {
- if (foldURemOfLoopIncrement(Rem, DL, LI, FreshBBs, IsHugeFunc))
- return true;
- return false;
-}
-
bool CodeGenPrepare::optimizeCmp(CmpInst *Cmp, ModifyDT &ModifiedDT) {
if (sinkCmpExpression(Cmp, *TLI))
return true;
@@ -8485,10 +8358,6 @@ bool CodeGenPrepare::optimizeInst(Instruction *I, ModifyDT &ModifiedDT) {
if (optimizeCmp(Cmp, ModifiedDT))
return true;
- if (match(I, m_URem(m_Value(), m_Value())))
- if (optimizeURem(I))
- return true;
-
if (LoadInst *LI = dyn_cast<LoadInst>(I)) {
LI->setMetadata(LLVMContext::MD_invariant_group, nullptr);
bool Modified = optimizeLoadExt(LI);
diff --git a/llvm/test/CodeGen/X86/fold-loop-of-urem.ll b/llvm/test/CodeGen/X86/fold-loop-of-urem.ll
index b9fe4d7c79c7fc..aad2e0dd7bd248 100644
--- a/llvm/test/CodeGen/X86/fold-loop-of-urem.ll
+++ b/llvm/test/CodeGen/X86/fold-loop-of-urem.ll
@@ -15,31 +15,25 @@ define void @simple_urem_to_sel(i32 %N, i32 %rem_amt) nounwind {
; CHECK-NEXT: je .LBB0_4
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: pushq %rbp
-; CHECK-NEXT: pushq %r15
; CHECK-NEXT: pushq %r14
-; CHECK-NEXT: pushq %r12
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: movl %esi, %ebx
; CHECK-NEXT: movl %edi, %ebp
-; CHECK-NEXT: xorl %r15d, %r15d
; CHECK-NEXT: xorl %r14d, %r14d
-; CHECK-NEXT: xorl %r12d, %r12d
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB0_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movl %r14d, %edi
+; CHECK-NEXT: movl %r14d, %eax
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: divl %ebx
+; CHECK-NEXT: movl %edx, %edi
; CHECK-NEXT: callq use.i32 at PLT
; CHECK-NEXT: incl %r14d
-; CHECK-NEXT: cmpl %ebx, %r14d
-; CHECK-NEXT: cmovel %r15d, %r14d
-; CHECK-NEXT: incl %r12d
-; CHECK-NEXT: cmpl %r12d, %ebp
+; CHECK-NEXT: cmpl %r14d, %ebp
; CHECK-NEXT: jne .LBB0_2
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: popq %r12
; CHECK-NEXT: popq %r14
-; CHECK-NEXT: popq %r15
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .LBB0_4: # %for.cond.cleanup
; CHECK-NEXT: retq
@@ -66,28 +60,24 @@ define void @simple_urem_to_sel_nested2(i32 %N, i32 %rem_amt) nounwind {
; CHECK-NEXT: je .LBB1_8
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: pushq %rbp
-; CHECK-NEXT: pushq %r15
; CHECK-NEXT: pushq %r14
-; CHECK-NEXT: pushq %r12
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: movl %esi, %ebx
; CHECK-NEXT: movl %edi, %ebp
-; CHECK-NEXT: xorl %r15d, %r15d
; CHECK-NEXT: xorl %r14d, %r14d
-; CHECK-NEXT: xorl %r12d, %r12d
; CHECK-NEXT: jmp .LBB1_2
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB1_5: # %for.body1
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: movl %r14d, %edi
+; CHECK-NEXT: movl %r14d, %eax
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: divl %ebx
+; CHECK-NEXT: movl %edx, %edi
; CHECK-NEXT: callq use.i32 at PLT
; CHECK-NEXT: .LBB1_6: # %for.body.tail
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: incl %r14d
-; CHECK-NEXT: cmpl %ebx, %r14d
-; CHECK-NEXT: cmovel %r15d, %r14d
-; CHECK-NEXT: incl %r12d
-; CHECK-NEXT: cmpl %r12d, %ebp
+; CHECK-NEXT: cmpl %r14d, %ebp
; CHECK-NEXT: je .LBB1_7
; CHECK-NEXT: .LBB1_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
@@ -107,9 +97,7 @@ define void @simple_urem_to_sel_nested2(i32 %N, i32 %rem_amt) nounwind {
; CHECK-NEXT: jmp .LBB1_6
; CHECK-NEXT: .LBB1_7:
; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: popq %r12
; CHECK-NEXT: popq %r14
-; CHECK-NEXT: popq %r15
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .LBB1_8: # %for.cond.cleanup
; CHECK-NEXT: retq
@@ -225,36 +213,40 @@ for.body.tail:
define void @simple_urem_to_sel_vec(<2 x i64> %rem_amt) nounwind {
; CHECK-LABEL: simple_urem_to_sel_vec:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: subq $56, %rsp
-; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: pushq %r14
+; CHECK-NEXT: pushq %rbx
+; CHECK-NEXT: subq $24, %rsp
; CHECK-NEXT: pxor %xmm1, %xmm1
-; CHECK-NEXT: pxor %xmm0, %xmm0
-; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill
+; CHECK-NEXT: movq %xmm0, %rbx
+; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3]
+; CHECK-NEXT: movq %xmm0, %r14
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB3_1: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: movdqa %xmm1, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movdqa %xmm1, %xmm0
+; CHECK-NEXT: movq %xmm1, %rax
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: divq %rbx
+; CHECK-NEXT: movq %rdx, %xmm0
+; CHECK-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3]
+; CHECK-NEXT: movq %xmm1, %rax
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: divq %r14
+; CHECK-NEXT: movq %rdx, %xmm1
+; CHECK-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
; CHECK-NEXT: callq use.2xi64 at PLT
-; CHECK-NEXT: pcmpeqd %xmm1, %xmm1
-; CHECK-NEXT: movdqa (%rsp), %xmm2 # 16-byte Reload
-; CHECK-NEXT: psubq %xmm1, %xmm2
-; CHECK-NEXT: movdqa %xmm2, %xmm0
-; CHECK-NEXT: movdqa %xmm2, %xmm3
-; CHECK-NEXT: pcmpeqd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
-; CHECK-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,0,3,2]
-; CHECK-NEXT: pand %xmm0, %xmm2
-; CHECK-NEXT: pandn %xmm3, %xmm2
-; CHECK-NEXT: movdqa %xmm2, (%rsp) # 16-byte Spill
-; CHECK-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
-; CHECK-NEXT: psubq %xmm1, %xmm0
-; CHECK-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
+; CHECK-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload
+; CHECK-NEXT: psubq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
+; CHECK-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill
; CHECK-NEXT: callq get.i1 at PLT
; CHECK-NEXT: testb $1, %al
-; CHECK-NEXT: movdqa (%rsp), %xmm1 # 16-byte Reload
; CHECK-NEXT: je .LBB3_1
; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
-; CHECK-NEXT: addq $56, %rsp
+; CHECK-NEXT: addq $24, %rsp
+; CHECK-NEXT: popq %rbx
+; CHECK-NEXT: popq %r14
; CHECK-NEXT: retq
entry:
br label %for.body
@@ -344,33 +336,27 @@ define void @simple_urem_to_sel_second_acc(i32 %N, i32 %rem_amt) nounwind {
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: pushq %r14
-; CHECK-NEXT: pushq %r13
-; CHECK-NEXT: pushq %r12
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: movl %esi, %ebx
; CHECK-NEXT: movl %edi, %ebp
; CHECK-NEXT: movl $1, %r15d
-; CHECK-NEXT: xorl %r12d, %r12d
; CHECK-NEXT: xorl %r14d, %r14d
-; CHECK-NEXT: xorl %r13d, %r13d
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB5_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movl %r14d, %edi
+; CHECK-NEXT: movl %r14d, %eax
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: divl %ebx
+; CHECK-NEXT: movl %edx, %edi
; CHECK-NEXT: callq use.i32 at PLT
; CHECK-NEXT: incl %r14d
-; CHECK-NEXT: cmpl %ebx, %r14d
-; CHECK-NEXT: cmovel %r12d, %r14d
-; CHECK-NEXT: incl %r13d
; CHECK-NEXT: addl $2, %r15d
; CHECK-NEXT: cmpl %ebp, %r15d
; CHECK-NEXT: jbe .LBB5_2
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: addq $8, %rsp
; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: popq %r12
-; CHECK-NEXT: popq %r13
; CHECK-NEXT: popq %r14
; CHECK-NEXT: popq %r15
; CHECK-NEXT: popq %rbp
@@ -690,27 +676,23 @@ define void @simple_urem_multi_latch_non_canonical(i32 %N, i32 %rem_amt) nounwin
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: pushq %r15
; CHECK-NEXT: pushq %r14
-; CHECK-NEXT: pushq %r13
-; CHECK-NEXT: pushq %r12
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: pushq %rax
; CHECK-NEXT: movl %esi, %ebx
; CHECK-NEXT: movl %edi, %ebp
; CHECK-NEXT: decl %ebp
-; CHECK-NEXT: xorl %r12d, %r12d
; CHECK-NEXT: xorl %r14d, %r14d
-; CHECK-NEXT: xorl %r13d, %r13d
; CHECK-NEXT: jmp .LBB12_2
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB12_3: # %for.body.backedge
; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1
; CHECK-NEXT: incl %r14d
-; CHECK-NEXT: cmpl %ebx, %r14d
-; CHECK-NEXT: cmovel %r12d, %r14d
-; CHECK-NEXT: incl %r13d
; CHECK-NEXT: .LBB12_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movl %r14d, %edi
+; CHECK-NEXT: movl %r14d, %eax
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: divl %ebx
+; CHECK-NEXT: movl %edx, %edi
; CHECK-NEXT: callq use.i32 at PLT
; CHECK-NEXT: callq get.i1 at PLT
; CHECK-NEXT: movl %eax, %r15d
@@ -720,13 +702,11 @@ define void @simple_urem_multi_latch_non_canonical(i32 %N, i32 %rem_amt) nounwin
; CHECK-NEXT: # %bb.4: # %for.body0
; CHECK-NEXT: # in Loop: Header=BB12_2 Depth=1
; CHECK-NEXT: callq do_stuff1 at PLT
-; CHECK-NEXT: cmpl %r13d, %ebp
+; CHECK-NEXT: cmpl %r14d, %ebp
; CHECK-NEXT: jne .LBB12_3
; CHECK-NEXT: # %bb.5:
; CHECK-NEXT: addq $8, %rsp
; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: popq %r12
-; CHECK-NEXT: popq %r13
; CHECK-NEXT: popq %r14
; CHECK-NEXT: popq %r15
; CHECK-NEXT: popq %rbp
@@ -890,31 +870,25 @@ define void @simple_urem_to_sel_non_zero_start(i32 %N, i32 %rem_amt) nounwind {
; CHECK-NEXT: jb .LBB16_4
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: pushq %rbp
-; CHECK-NEXT: pushq %r15
; CHECK-NEXT: pushq %r14
-; CHECK-NEXT: pushq %r12
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: movl %esi, %ebx
; CHECK-NEXT: movl %edi, %ebp
; CHECK-NEXT: movl $2, %r14d
-; CHECK-NEXT: xorl %r15d, %r15d
-; CHECK-NEXT: movl $2, %r12d
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB16_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movl %r14d, %edi
+; CHECK-NEXT: movl %r14d, %eax
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: divl %ebx
+; CHECK-NEXT: movl %edx, %edi
; CHECK-NEXT: callq use.i32 at PLT
; CHECK-NEXT: incl %r14d
-; CHECK-NEXT: cmpl %ebx, %r14d
-; CHECK-NEXT: cmovel %r15d, %r14d
-; CHECK-NEXT: incl %r12d
-; CHECK-NEXT: cmpl %r12d, %ebp
+; CHECK-NEXT: cmpl %r14d, %ebp
; CHECK-NEXT: jne .LBB16_2
; CHECK-NEXT: # %bb.3:
; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: popq %r12
; CHECK-NEXT: popq %r14
-; CHECK-NEXT: popq %r15
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .LBB16_4: # %for.cond.cleanup
; CHECK-NEXT: retq
@@ -1092,34 +1066,28 @@ define void @simple_urem_to_sel_non_zero_start_through_sub(i32 %N, i32 %rem_amt,
; CHECK-LABEL: simple_urem_to_sel_non_zero_start_through_sub:
; CHECK: # %bb.0: # %entry
; CHECK-NEXT: pushq %rbp
-; CHECK-NEXT: pushq %r15
; CHECK-NEXT: pushq %r14
-; CHECK-NEXT: pushq %r12
; CHECK-NEXT: pushq %rbx
; CHECK-NEXT: movl %edi, %ebp
; CHECK-NEXT: subl %edx, %ebp
; CHECK-NEXT: jbe .LBB20_3
; CHECK-NEXT: # %bb.1: # %for.body.preheader
; CHECK-NEXT: movl %esi, %ebx
-; CHECK-NEXT: xorl %r15d, %r15d
; CHECK-NEXT: xorl %r14d, %r14d
-; CHECK-NEXT: xorl %r12d, %r12d
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: .LBB20_2: # %for.body
; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: movl %r14d, %edi
+; CHECK-NEXT: movl %r14d, %eax
+; CHECK-NEXT: xorl %edx, %edx
+; CHECK-NEXT: divl %ebx
+; CHECK-NEXT: movl %edx, %edi
; CHECK-NEXT: callq use.i32 at PLT
; CHECK-NEXT: incl %r14d
-; CHECK-NEXT: cmpl %ebx, %r14d
-; CHECK-NEXT: cmovel %r15d, %r14d
-; CHECK-NEXT: incl %r12d
-; CHECK-NEXT: cmpl %r12d, %ebp
+; CHECK-NEXT: cmpl %r14d, %ebp
; CHECK-NEXT: jne .LBB20_2
; CHECK-NEXT: .LBB20_3: # %for.cond.cleanup
; CHECK-NEXT: popq %rbx
-; CHECK-NEXT: popq %r12
; CHECK-NEXT: popq %r14
-; CHECK-NEXT: popq %r15
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: retq
entry:
diff --git a/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll b/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll
index 304ae337ed4197..a019679e65905d 100644
--- a/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll
+++ b/llvm/test/Transforms/CodeGenPrepare/X86/fold-loop-of-urem.ll
@@ -19,12 +19,9 @@ define void @simple_urem_to_sel(i32 %N, i32 %rem_amt) nounwind {
; CHECK: [[FOR_COND_CLEANUP]]:
; CHECK-NEXT: ret void
; CHECK: [[FOR_BODY]]:
-; CHECK-NEXT: [[REM:%.*]] = phi i32 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[TMP3:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[REM:%.*]] = urem i32 [[I_04]], [[REM_AMT]]
; CHECK-NEXT: tail call void @use.i32(i32 [[REM]])
-; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[REM]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], [[REM_AMT]]
-; CHECK-NEXT: [[TMP3]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
@@ -56,8 +53,7 @@ define void @simple_urem_to_sel_nested2(i32 %N, i32 %rem_amt) nounwind {
; CHECK: [[FOR_COND_CLEANUP]]:
; CHECK-NEXT: ret void
; CHECK: [[FOR_BODY]]:
-; CHECK-NEXT: [[REM:%.*]] = phi i32 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[TMP3:%.*]], %[[FOR_BODY_TAIL:.*]] ]
-; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY_TAIL]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY_TAIL:.*]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[COND0:%.*]] = call i1 @get.i1()
; CHECK-NEXT: br i1 [[COND0]], label %[[FOR_BODY0:.*]], label %[[FOR_BODY_TAIL]]
; CHECK: [[FOR_BODY0]]:
@@ -67,12 +63,10 @@ define void @simple_urem_to_sel_nested2(i32 %N, i32 %rem_amt) nounwind {
; CHECK-NEXT: [[COND2:%.*]] = call i1 @get.i1()
; CHECK-NEXT: br i1 [[COND2]], label %[[FOR_BODY1]], label %[[FOR_BODY_TAIL]]
; CHECK: [[FOR_BODY1]]:
+; CHECK-NEXT: [[REM:%.*]] = urem i32 [[I_04]], [[REM_AMT]]
; CHECK-NEXT: tail call void @use.i32(i32 [[REM]])
; CHECK-NEXT: br label %[[FOR_BODY_TAIL]]
; CHECK: [[FOR_BODY_TAIL]]:
-; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[REM]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], [[REM_AMT]]
-; CHECK-NEXT: [[TMP3]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
@@ -170,12 +164,9 @@ define void @simple_urem_to_sel_vec(<2 x i64> %rem_amt) nounwind {
; CHECK: [[FOR_COND_CLEANUP:.*]]:
; CHECK-NEXT: ret void
; CHECK: [[FOR_BODY]]:
-; CHECK-NEXT: [[REM:%.*]] = phi <2 x i64> [ zeroinitializer, %[[ENTRY]] ], [ [[TMP3:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[I_04:%.*]] = phi <2 x i64> [ [[INC:%.*]], %[[FOR_BODY]] ], [ zeroinitializer, %[[ENTRY]] ]
+; CHECK-NEXT: [[REM:%.*]] = urem <2 x i64> [[I_04]], [[REM_AMT]]
; CHECK-NEXT: tail call void @use.2xi64(<2 x i64> [[REM]])
-; CHECK-NEXT: [[TMP1:%.*]] = add nuw <2 x i64> [[REM]], <i64 1, i64 1>
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <2 x i64> [[TMP1]], [[REM_AMT]]
-; CHECK-NEXT: [[TMP3]] = select <2 x i1> [[TMP2]], <2 x i64> zeroinitializer, <2 x i64> [[TMP1]]
; CHECK-NEXT: [[INC]] = add nuw <2 x i64> [[I_04]], <i64 1, i64 1>
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = call i1 @get.i1()
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
@@ -255,13 +246,10 @@ define void @simple_urem_to_sel_second_acc(i32 %N, i32 %rem_amt) nounwind {
; CHECK: [[FOR_COND_CLEANUP]]:
; CHECK-NEXT: ret void
; CHECK: [[FOR_BODY]]:
-; CHECK-NEXT: [[REM:%.*]] = phi i32 [ 0, %[[FOR_BODY_PREHEADER]] ], [ [[TMP3:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 0, %[[FOR_BODY_PREHEADER]] ]
; CHECK-NEXT: [[I_05:%.*]] = phi i32 [ [[INC2:%.*]], %[[FOR_BODY]] ], [ 1, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[REM:%.*]] = urem i32 [[I_04]], [[REM_AMT]]
; CHECK-NEXT: tail call void @use.i32(i32 [[REM]])
-; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[REM]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], [[REM_AMT]]
-; CHECK-NEXT: [[TMP3]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1
; CHECK-NEXT: [[INC2]] = add nuw i32 [[I_05]], 2
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp ugt i32 [[INC2]], [[N]]
@@ -398,12 +386,9 @@ define void @simple_urem_non_zero_entry4(i32 %N, i32 %rem_amt) nounwind {
; CHECK: [[FOR_COND_CLEANUP]]:
; CHECK-NEXT: ret void
; CHECK: [[FOR_BODY]]:
-; CHECK-NEXT: [[REM:%.*]] = phi i32 [ 4, %[[FOR_BODY_PREHEADER]] ], [ [[TMP3:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 4, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[REM:%.*]] = urem i32 [[I_04]], [[REM_AMT]]
; CHECK-NEXT: tail call void @use.i32(i32 [[REM]])
-; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[REM]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], [[REM_AMT]]
-; CHECK-NEXT: [[TMP3]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
@@ -472,12 +457,9 @@ define void @simple_urem_fail_no_preheader_non_canonical(i32 %N, i32 %rem_amt) n
; CHECK-NEXT: [[I_04_PH:%.*]] = phi i32 [ 1, %[[FOR_BODY1]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
-; CHECK-NEXT: [[REM:%.*]] = phi i32 [ [[I_04_PH]], %[[FOR_BODY_PREHEADER]] ], [ [[TMP3:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ [[I_04_PH]], %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[REM:%.*]] = urem i32 [[I_04]], [[REM_AMT]]
; CHECK-NEXT: tail call void @use.i32(i32 [[REM]])
-; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[REM]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], [[REM_AMT]]
-; CHECK-NEXT: [[TMP3]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
@@ -668,12 +650,9 @@ define void @simple_urem_to_sel_non_zero_start(i32 %N, i32 %rem_amt) nounwind {
; CHECK: [[FOR_COND_CLEANUP]]:
; CHECK-NEXT: ret void
; CHECK: [[FOR_BODY]]:
-; CHECK-NEXT: [[REM:%.*]] = phi i32 [ 2, %[[FOR_BODY_PREHEADER]] ], [ [[TMP3:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[I_04:%.*]] = phi i32 [ [[INC:%.*]], %[[FOR_BODY]] ], [ 2, %[[FOR_BODY_PREHEADER]] ]
+; CHECK-NEXT: [[REM:%.*]] = urem i32 [[I_04]], [[REM_AMT]]
; CHECK-NEXT: tail call void @use.i32(i32 [[REM]])
-; CHECK-NEXT: [[TMP1:%.*]] = add nuw i32 [[REM]], 1
-; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], [[REM_AMT]]
-; CHECK-NEXT: [[TMP3]] = select i1 [[TMP2]], i32 0, i32 [[TMP1]]
; CHECK-NEXT: [[INC]] = add nuw i32 [[I_04]], 1
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i32 [[INC]], [[N]]
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[FOR_COND_CLEANUP]], label %[[FOR_BODY]]
More information about the llvm-commits
mailing list