[llvm] 90081f3 - Revert "[Codegenprepare][X86] Use usub with overflow opt for IV increment"
Max Kazantsev via llvm-commits
llvm-commits at lists.llvm.org
Thu Feb 11 02:52:33 PST 2021
Author: Max Kazantsev
Date: 2021-02-11T17:52:11+07:00
New Revision: 90081f3020e38727eb30506d052cbb4e3a489eb6
URL: https://github.com/llvm/llvm-project/commit/90081f3020e38727eb30506d052cbb4e3a489eb6
DIFF: https://github.com/llvm/llvm-project/commit/90081f3020e38727eb30506d052cbb4e3a489eb6.diff
LOG: Revert "[Codegenprepare][X86] Use usub with overflow opt for IV increment"
This reverts commit 3d15b7e7dfc3e2cefc47791d1e8d95909e937842.
We've found an internal failure, need to analyze.
Added:
Modified:
llvm/lib/CodeGen/CodeGenPrepare.cpp
llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll
llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll
llvm/test/CodeGen/X86/usub_inc_iv.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/CodeGenPrepare.cpp b/llvm/lib/CodeGen/CodeGenPrepare.cpp
index d62e1c119fe2..2b6dbde3ac58 100644
--- a/llvm/lib/CodeGen/CodeGenPrepare.cpp
+++ b/llvm/lib/CodeGen/CodeGenPrepare.cpp
@@ -1284,29 +1284,7 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
Value *Arg0, Value *Arg1,
CmpInst *Cmp,
Intrinsic::ID IID) {
- auto isIVIncrement = [this, &Cmp](BinaryOperator *BO) {
- auto *PN = dyn_cast<PHINode>(BO->getOperand(0));
- if (!PN)
- return false;
- const Loop *L = LI->getLoopFor(BO->getParent());
- if (!L || L->getHeader() != PN->getParent() || !L->getLoopLatch())
- return false;
- if (PN->getIncomingValueForBlock(L->getLoopLatch()) != BO)
- return false;
- if (auto *Step = dyn_cast<Instruction>(BO->getOperand(1)))
- if (L->contains(Step->getParent()))
- return false;
- // IV increment may have other users than the IV. We do not want to make
- // dominance queries to analyze the legality of moving it towards the cmp,
- // so just check that there is no other users.
- if (!BO->hasOneUse())
- return false;
- // Do not risk on moving increment into a child loop.
- if (LI->getLoopFor(Cmp->getParent()) != L)
- return false;
- return true;
- };
- if (BO->getParent() != Cmp->getParent() && !isIVIncrement(BO)) {
+ if (BO->getParent() != Cmp->getParent()) {
// We used to use a dominator tree here to allow multi-block optimization.
// But that was problematic because:
// 1. It could cause a perf regression by hoisting the math op into the
@@ -1317,16 +1295,9 @@ bool CodeGenPrepare::replaceMathCmpWithIntrinsic(BinaryOperator *BO,
// This is because we recompute the DT on every change in the main CGP
// run-loop. The recomputing is probably unnecessary in many cases, so if
// that was fixed, using a DT here would be ok.
- //
- // There is one important particular case we still want to handle: if BO is
- // the IV increment. Important properties that make it profitable:
- // - We can speculate IV increment anywhere in the loop (as long as the
- // indvar Phi is its only user);
- // - Upon computing Cmp, we effectively compute something equivalent to the
- // IV increment (despite it loops
diff erently in the IR). So moving it up
- // to the cmp point does not really increase register pressure.
return false;
}
+
// We allow matching the canonical IR (add X, C) back to (usubo X, -C).
if (BO->getOpcode() == Instruction::Add &&
IID == Intrinsic::usub_with_overflow) {
diff --git a/llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll b/llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll
index c59f4a96464d..fc0d0ae36720 100644
--- a/llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll
+++ b/llvm/test/CodeGen/X86/2020_12_02_decrementing_loop.ll
@@ -89,16 +89,15 @@ failure: ; preds = %backedge
define i32 @test_02(i32* %p, i64 %len, i32 %x) {
; CHECK-LABEL: test_02:
; CHECK: ## %bb.0: ## %entry
-; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB2_1: ## %loop
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: subq $1, %rax
-; CHECK-NEXT: jb LBB2_4
+; CHECK-NEXT: testq %rsi, %rsi
+; CHECK-NEXT: je LBB2_4
; CHECK-NEXT: ## %bb.2: ## %backedge
; CHECK-NEXT: ## in Loop: Header=BB2_1 Depth=1
; CHECK-NEXT: cmpl %edx, -4(%rdi,%rsi,4)
-; CHECK-NEXT: movq %rax, %rsi
+; CHECK-NEXT: leaq -1(%rsi), %rsi
; CHECK-NEXT: jne LBB2_1
; CHECK-NEXT: ## %bb.3: ## %failure
; CHECK-NEXT: ud2
@@ -133,16 +132,15 @@ failure: ; preds = %backedge
define i32 @test_03(i32* %p, i64 %len, i32 %x) {
; CHECK-LABEL: test_03:
; CHECK: ## %bb.0: ## %entry
-; CHECK-NEXT: movq %rsi, %rax
; CHECK-NEXT: .p2align 4, 0x90
; CHECK-NEXT: LBB3_1: ## %loop
; CHECK-NEXT: ## =>This Inner Loop Header: Depth=1
-; CHECK-NEXT: subq $1, %rax
-; CHECK-NEXT: jb LBB3_4
+; CHECK-NEXT: testq %rsi, %rsi
+; CHECK-NEXT: je LBB3_4
; CHECK-NEXT: ## %bb.2: ## %backedge
; CHECK-NEXT: ## in Loop: Header=BB3_1 Depth=1
; CHECK-NEXT: cmpl %edx, -4(%rdi,%rsi,4)
-; CHECK-NEXT: movq %rax, %rsi
+; CHECK-NEXT: leaq -1(%rsi), %rsi
; CHECK-NEXT: jne LBB3_1
; CHECK-NEXT: ## %bb.3: ## %failure
; CHECK-NEXT: ud2
diff --git a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll
index cb37086459dd..6472c62fc57e 100644
--- a/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll
+++ b/llvm/test/CodeGen/X86/lsr-loop-exit-cond.ll
@@ -16,11 +16,11 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
; GENERIC-NEXT: movl (%rdx), %eax
; GENERIC-NEXT: movl 4(%rdx), %ebx
; GENERIC-NEXT: decl %ecx
-; GENERIC-NEXT: leaq 20(%rdx), %r11
+; GENERIC-NEXT: leaq 20(%rdx), %r14
; GENERIC-NEXT: movq _Te0@{{.*}}(%rip), %r9
; GENERIC-NEXT: movq _Te1@{{.*}}(%rip), %r8
; GENERIC-NEXT: movq _Te3@{{.*}}(%rip), %r10
-; GENERIC-NEXT: movq %rcx, %r14
+; GENERIC-NEXT: movq %rcx, %r11
; GENERIC-NEXT: .p2align 4, 0x90
; GENERIC-NEXT: LBB0_1: ## %bb
; GENERIC-NEXT: ## =>This Inner Loop Header: Depth=1
@@ -32,29 +32,30 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
; GENERIC-NEXT: movzbl %bpl, %ebp
; GENERIC-NEXT: movl (%r8,%rbp,4), %ebp
; GENERIC-NEXT: xorl (%r9,%rax,4), %ebp
-; GENERIC-NEXT: xorl -12(%r11), %ebp
+; GENERIC-NEXT: xorl -12(%r14), %ebp
; GENERIC-NEXT: shrl $24, %ebx
; GENERIC-NEXT: movl (%r10,%rdi,4), %edi
; GENERIC-NEXT: xorl (%r9,%rbx,4), %edi
-; GENERIC-NEXT: xorl -8(%r11), %edi
+; GENERIC-NEXT: xorl -8(%r14), %edi
; GENERIC-NEXT: movl %ebp, %eax
; GENERIC-NEXT: shrl $24, %eax
; GENERIC-NEXT: movl (%r9,%rax,4), %eax
-; GENERIC-NEXT: subq $1, %r14
-; GENERIC-NEXT: jb LBB0_3
+; GENERIC-NEXT: testq %r11, %r11
+; GENERIC-NEXT: je LBB0_3
; GENERIC-NEXT: ## %bb.2: ## %bb1
; GENERIC-NEXT: ## in Loop: Header=BB0_1 Depth=1
; GENERIC-NEXT: movl %edi, %ebx
; GENERIC-NEXT: shrl $16, %ebx
; GENERIC-NEXT: movzbl %bl, %ebx
; GENERIC-NEXT: xorl (%r8,%rbx,4), %eax
-; GENERIC-NEXT: xorl -4(%r11), %eax
+; GENERIC-NEXT: xorl -4(%r14), %eax
; GENERIC-NEXT: shrl $24, %edi
; GENERIC-NEXT: movzbl %bpl, %ebx
; GENERIC-NEXT: movl (%r10,%rbx,4), %ebx
; GENERIC-NEXT: xorl (%r9,%rdi,4), %ebx
-; GENERIC-NEXT: xorl (%r11), %ebx
-; GENERIC-NEXT: addq $16, %r11
+; GENERIC-NEXT: xorl (%r14), %ebx
+; GENERIC-NEXT: decq %r11
+; GENERIC-NEXT: addq $16, %r14
; GENERIC-NEXT: jmp LBB0_1
; GENERIC-NEXT: LBB0_3: ## %bb2
; GENERIC-NEXT: shlq $4, %rcx
@@ -98,12 +99,12 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
; ATOM-NEXT: ## kill: def $ecx killed $ecx def $rcx
; ATOM-NEXT: movl (%rdx), %r15d
; ATOM-NEXT: movl 4(%rdx), %eax
-; ATOM-NEXT: leaq 20(%rdx), %r11
+; ATOM-NEXT: leaq 20(%rdx), %r14
; ATOM-NEXT: movq _Te0@{{.*}}(%rip), %r9
; ATOM-NEXT: movq _Te1@{{.*}}(%rip), %r8
; ATOM-NEXT: movq _Te3@{{.*}}(%rip), %r10
; ATOM-NEXT: decl %ecx
-; ATOM-NEXT: movq %rcx, %r14
+; ATOM-NEXT: movq %rcx, %r11
; ATOM-NEXT: .p2align 4, 0x90
; ATOM-NEXT: LBB0_1: ## %bb
; ATOM-NEXT: ## =>This Inner Loop Header: Depth=1
@@ -117,27 +118,28 @@ define void @t(i8* nocapture %in, i8* nocapture %out, i32* nocapture %rk, i32 %r
; ATOM-NEXT: movzbl %r15b, %edi
; ATOM-NEXT: xorl (%r9,%rbp,4), %ebx
; ATOM-NEXT: movl (%r10,%rdi,4), %edi
-; ATOM-NEXT: xorl -12(%r11), %ebx
+; ATOM-NEXT: xorl -12(%r14), %ebx
; ATOM-NEXT: xorl (%r9,%rax,4), %edi
; ATOM-NEXT: movl %ebx, %eax
-; ATOM-NEXT: xorl -8(%r11), %edi
+; ATOM-NEXT: xorl -8(%r14), %edi
; ATOM-NEXT: shrl $24, %eax
; ATOM-NEXT: movl (%r9,%rax,4), %r15d
-; ATOM-NEXT: subq $1, %r14
+; ATOM-NEXT: testq %r11, %r11
; ATOM-NEXT: movl %edi, %eax
-; ATOM-NEXT: jb LBB0_3
+; ATOM-NEXT: je LBB0_3
; ATOM-NEXT: ## %bb.2: ## %bb1
; ATOM-NEXT: ## in Loop: Header=BB0_1 Depth=1
; ATOM-NEXT: shrl $16, %eax
; ATOM-NEXT: shrl $24, %edi
-; ATOM-NEXT: movzbl %al, %eax
-; ATOM-NEXT: xorl (%r8,%rax,4), %r15d
+; ATOM-NEXT: decq %r11
+; ATOM-NEXT: movzbl %al, %ebp
; ATOM-NEXT: movzbl %bl, %eax
; ATOM-NEXT: movl (%r10,%rax,4), %eax
-; ATOM-NEXT: xorl -4(%r11), %r15d
+; ATOM-NEXT: xorl (%r8,%rbp,4), %r15d
; ATOM-NEXT: xorl (%r9,%rdi,4), %eax
-; ATOM-NEXT: xorl (%r11), %eax
-; ATOM-NEXT: addq $16, %r11
+; ATOM-NEXT: xorl -4(%r14), %r15d
+; ATOM-NEXT: xorl (%r14), %eax
+; ATOM-NEXT: addq $16, %r14
; ATOM-NEXT: jmp LBB0_1
; ATOM-NEXT: LBB0_3: ## %bb2
; ATOM-NEXT: shrl $16, %eax
diff --git a/llvm/test/CodeGen/X86/usub_inc_iv.ll b/llvm/test/CodeGen/X86/usub_inc_iv.ll
index 80f367a53479..7bf8b5e8b147 100644
--- a/llvm/test/CodeGen/X86/usub_inc_iv.ll
+++ b/llvm/test/CodeGen/X86/usub_inc_iv.ll
@@ -102,19 +102,18 @@ define i32 @test_02(i32* %p, i64 %len, i32 %x) {
; CHECK-NEXT: entry:
; CHECK-NEXT: br label [[LOOP:%.*]]
; CHECK: loop:
-; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[MATH:%.*]], [[BACKEDGE:%.*]] ], [ [[LEN:%.*]], [[ENTRY:%.*]] ]
-; CHECK-NEXT: [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[IV]], i64 1)
-; CHECK-NEXT: [[MATH]] = extractvalue { i64, i1 } [[TMP0]], 0
-; CHECK-NEXT: [[OV:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
-; CHECK-NEXT: br i1 [[OV]], label [[EXIT:%.*]], label [[BACKEDGE]]
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[BACKEDGE:%.*]] ], [ [[LEN:%.*]], [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[COND_1:%.*]] = icmp eq i64 [[IV]], 0
+; CHECK-NEXT: br i1 [[COND_1]], label [[EXIT:%.*]], label [[BACKEDGE]]
; CHECK: backedge:
; CHECK-NEXT: [[SUNKADDR:%.*]] = mul i64 [[IV]], 4
-; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[P:%.*]] to i8*
-; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, i8* [[TMP1]], i64 [[SUNKADDR]]
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to i8*
+; CHECK-NEXT: [[SUNKADDR1:%.*]] = getelementptr i8, i8* [[TMP0]], i64 [[SUNKADDR]]
; CHECK-NEXT: [[SUNKADDR2:%.*]] = getelementptr i8, i8* [[SUNKADDR1]], i64 -4
-; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8* [[SUNKADDR2]] to i32*
-; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[TMP2]] unordered, align 4
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8* [[SUNKADDR2]] to i32*
+; CHECK-NEXT: [[LOADED:%.*]] = load atomic i32, i32* [[TMP1]] unordered, align 4
; CHECK-NEXT: [[COND_2:%.*]] = icmp eq i32 [[LOADED]], [[X:%.*]]
+; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], -1
; CHECK-NEXT: br i1 [[COND_2]], label [[FAILURE:%.*]], label [[LOOP]]
; CHECK: exit:
; CHECK-NEXT: ret i32 -1
More information about the llvm-commits
mailing list