[llvm] r356665 - [CodeGenPrepare] limit formation of overflow intrinsics (PR41129)

Sanjay Patel via llvm-commits llvm-commits at lists.llvm.org
Thu Mar 21 06:57:07 PDT 2019


Author: spatel
Date: Thu Mar 21 06:57:07 2019
New Revision: 356665

URL: http://llvm.org/viewvc/llvm-project?rev=356665&view=rev
Log:
[CodeGenPrepare] limit formation of overflow intrinsics (PR41129)

This is probably a bigger limitation than necessary, but since we don't have any evidence yet
that this transform led to real-world perf improvements rather than regressions, I'm making a
quick, blunt fix.

In the motivating x86 example from:
https://bugs.llvm.org/show_bug.cgi?id=41129
...and shown in the regression test, we want to avoid an extra instruction in the dominating
block because that could be costly.

The x86 LSR test diff is reversing the changes from D57789. There's no evidence that 1 version
is any better than the other yet.

Differential Revision: https://reviews.llvm.org/D59602

Modified:
    llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
    llvm/trunk/test/CodeGen/X86/cgp-usubo.ll
    llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll
    llvm/trunk/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll

Modified: llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp?rev=356665&r1=356664&r2=356665&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp (original)
+++ llvm/trunk/lib/CodeGen/CodeGenPrepare.cpp Thu Mar 21 06:57:07 2019
@@ -1181,10 +1181,14 @@ static bool replaceMathCmpWithIntrinsic(
     if (!MathDominates && !DT.dominates(Cmp, BO))
       return false;
 
-    // Check that the insertion doesn't create a value that is live across more
-    // than two blocks, so to minimise the increase in register pressure.
     BasicBlock *MathBB = BO->getParent(), *CmpBB = Cmp->getParent();
     if (MathBB != CmpBB) {
+      // Avoid hoisting an extra op into a dominating block and creating a
+      // potentially longer critical path.
+      if (!MathDominates)
+        return false;
+      // Check that the insertion doesn't create a value that is live across
+      // more than two blocks, so to minimise the increase in register pressure.
       BasicBlock *Dominator = MathDominates ? MathBB : CmpBB;
       BasicBlock *Dominated = MathDominates ? CmpBB : MathBB;
       auto Successors = successors(Dominator);

Modified: llvm/trunk/test/CodeGen/X86/cgp-usubo.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/cgp-usubo.ll?rev=356665&r1=356664&r2=356665&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/cgp-usubo.ll (original)
+++ llvm/trunk/test/CodeGen/X86/cgp-usubo.ll Thu Mar 21 06:57:07 2019
@@ -211,16 +211,16 @@ define void @PR41129(i64* %p64) {
 ; CHECK-LABEL: PR41129:
 ; CHECK:       # %bb.0: # %entry
 ; CHECK-NEXT:    movq (%rdi), %rax
-; CHECK-NEXT:    movq %rax, %rcx
-; CHECK-NEXT:    subq $1, %rcx
-; CHECK-NEXT:    jae .LBB10_1
-; CHECK-NEXT:  # %bb.2: # %true
-; CHECK-NEXT:    movq %rcx, (%rdi)
-; CHECK-NEXT:    retq
-; CHECK-NEXT:  .LBB10_1: # %false
+; CHECK-NEXT:    testq %rax, %rax
+; CHECK-NEXT:    je .LBB10_2
+; CHECK-NEXT:  # %bb.1: # %false
 ; CHECK-NEXT:    andl $7, %eax
 ; CHECK-NEXT:    movq %rax, (%rdi)
 ; CHECK-NEXT:    retq
+; CHECK-NEXT:  .LBB10_2: # %true
+; CHECK-NEXT:    decq %rax
+; CHECK-NEXT:    movq %rax, (%rdi)
+; CHECK-NEXT:    retq
 entry:
   %key = load i64, i64* %p64, align 8
   %cond17 = icmp eq i64 %key, 0

Modified: llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll?rev=356665&r1=356664&r2=356665&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll (original)
+++ llvm/trunk/test/CodeGen/X86/lsr-loop-exit-cond.ll Thu Mar 21 06:57:07 2019
@@ -16,11 +16,11 @@ define void @t(i8* nocapture %in, i8* no
 ; GENERIC-NEXT:    movl (%rdx), %eax
 ; GENERIC-NEXT:    movl 4(%rdx), %ebx
 ; GENERIC-NEXT:    decl %ecx
-; GENERIC-NEXT:    leaq 20(%rdx), %r11
+; GENERIC-NEXT:    leaq 20(%rdx), %r14
 ; GENERIC-NEXT:    movq _Te0@{{.*}}(%rip), %r9
 ; GENERIC-NEXT:    movq _Te1@{{.*}}(%rip), %r8
 ; GENERIC-NEXT:    movq _Te3@{{.*}}(%rip), %r10
-; GENERIC-NEXT:    movq %rcx, %r14
+; GENERIC-NEXT:    movq %rcx, %r11
 ; GENERIC-NEXT:    jmp LBB0_1
 ; GENERIC-NEXT:    .p2align 4, 0x90
 ; GENERIC-NEXT:  LBB0_2: ## %bb1
@@ -29,13 +29,14 @@ define void @t(i8* nocapture %in, i8* no
 ; GENERIC-NEXT:    shrl $16, %ebx
 ; GENERIC-NEXT:    movzbl %bl, %ebx
 ; GENERIC-NEXT:    xorl (%r8,%rbx,4), %eax
-; GENERIC-NEXT:    xorl -4(%r11), %eax
+; GENERIC-NEXT:    xorl -4(%r14), %eax
 ; GENERIC-NEXT:    shrl $24, %edi
 ; GENERIC-NEXT:    movzbl %bpl, %ebx
 ; GENERIC-NEXT:    movl (%r10,%rbx,4), %ebx
 ; GENERIC-NEXT:    xorl (%r9,%rdi,4), %ebx
-; GENERIC-NEXT:    xorl (%r11), %ebx
-; GENERIC-NEXT:    addq $16, %r11
+; GENERIC-NEXT:    xorl (%r14), %ebx
+; GENERIC-NEXT:    decq %r11
+; GENERIC-NEXT:    addq $16, %r14
 ; GENERIC-NEXT:  LBB0_1: ## %bb
 ; GENERIC-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; GENERIC-NEXT:    movzbl %al, %edi
@@ -46,16 +47,16 @@ define void @t(i8* nocapture %in, i8* no
 ; GENERIC-NEXT:    movzbl %bpl, %ebp
 ; GENERIC-NEXT:    movl (%r8,%rbp,4), %ebp
 ; GENERIC-NEXT:    xorl (%r9,%rax,4), %ebp
-; GENERIC-NEXT:    xorl -12(%r11), %ebp
+; GENERIC-NEXT:    xorl -12(%r14), %ebp
 ; GENERIC-NEXT:    shrl $24, %ebx
 ; GENERIC-NEXT:    movl (%r10,%rdi,4), %edi
 ; GENERIC-NEXT:    xorl (%r9,%rbx,4), %edi
-; GENERIC-NEXT:    xorl -8(%r11), %edi
+; GENERIC-NEXT:    xorl -8(%r14), %edi
 ; GENERIC-NEXT:    movl %ebp, %eax
 ; GENERIC-NEXT:    shrl $24, %eax
 ; GENERIC-NEXT:    movl (%r9,%rax,4), %eax
-; GENERIC-NEXT:    subq $1, %r14
-; GENERIC-NEXT:    jae LBB0_2
+; GENERIC-NEXT:    testq %r11, %r11
+; GENERIC-NEXT:    jne LBB0_2
 ; GENERIC-NEXT:  ## %bb.3: ## %bb2
 ; GENERIC-NEXT:    shlq $4, %rcx
 ; GENERIC-NEXT:    andl $-16777216, %eax ## imm = 0xFF000000
@@ -98,26 +99,27 @@ define void @t(i8* nocapture %in, i8* no
 ; ATOM-NEXT:    ## kill: def $ecx killed $ecx def $rcx
 ; ATOM-NEXT:    movl (%rdx), %r15d
 ; ATOM-NEXT:    movl 4(%rdx), %eax
-; ATOM-NEXT:    leaq 20(%rdx), %r11
+; ATOM-NEXT:    leaq 20(%rdx), %r14
 ; ATOM-NEXT:    movq _Te0@{{.*}}(%rip), %r9
 ; ATOM-NEXT:    movq _Te1@{{.*}}(%rip), %r8
 ; ATOM-NEXT:    movq _Te3@{{.*}}(%rip), %r10
 ; ATOM-NEXT:    decl %ecx
-; ATOM-NEXT:    movq %rcx, %r14
+; ATOM-NEXT:    movq %rcx, %r11
 ; ATOM-NEXT:    jmp LBB0_1
 ; ATOM-NEXT:    .p2align 4, 0x90
 ; ATOM-NEXT:  LBB0_2: ## %bb1
 ; ATOM-NEXT:    ## in Loop: Header=BB0_1 Depth=1
 ; ATOM-NEXT:    shrl $16, %eax
 ; ATOM-NEXT:    shrl $24, %edi
-; ATOM-NEXT:    movzbl %al, %eax
-; ATOM-NEXT:    xorl (%r8,%rax,4), %r15d
+; ATOM-NEXT:    decq %r11
+; ATOM-NEXT:    movzbl %al, %ebp
 ; ATOM-NEXT:    movzbl %bl, %eax
 ; ATOM-NEXT:    movl (%r10,%rax,4), %eax
-; ATOM-NEXT:    xorl -4(%r11), %r15d
+; ATOM-NEXT:    xorl (%r8,%rbp,4), %r15d
 ; ATOM-NEXT:    xorl (%r9,%rdi,4), %eax
-; ATOM-NEXT:    xorl (%r11), %eax
-; ATOM-NEXT:    addq $16, %r11
+; ATOM-NEXT:    xorl -4(%r14), %r15d
+; ATOM-NEXT:    xorl (%r14), %eax
+; ATOM-NEXT:    addq $16, %r14
 ; ATOM-NEXT:  LBB0_1: ## %bb
 ; ATOM-NEXT:    ## =>This Inner Loop Header: Depth=1
 ; ATOM-NEXT:    movl %eax, %edi
@@ -130,15 +132,15 @@ define void @t(i8* nocapture %in, i8* no
 ; ATOM-NEXT:    movzbl %r15b, %edi
 ; ATOM-NEXT:    xorl (%r9,%rbp,4), %ebx
 ; ATOM-NEXT:    movl (%r10,%rdi,4), %edi
-; ATOM-NEXT:    xorl -12(%r11), %ebx
+; ATOM-NEXT:    xorl -12(%r14), %ebx
 ; ATOM-NEXT:    xorl (%r9,%rax,4), %edi
 ; ATOM-NEXT:    movl %ebx, %eax
-; ATOM-NEXT:    xorl -8(%r11), %edi
+; ATOM-NEXT:    xorl -8(%r14), %edi
 ; ATOM-NEXT:    shrl $24, %eax
 ; ATOM-NEXT:    movl (%r9,%rax,4), %r15d
-; ATOM-NEXT:    subq $1, %r14
+; ATOM-NEXT:    testq %r11, %r11
 ; ATOM-NEXT:    movl %edi, %eax
-; ATOM-NEXT:    jae LBB0_2
+; ATOM-NEXT:    jne LBB0_2
 ; ATOM-NEXT:  ## %bb.3: ## %bb2
 ; ATOM-NEXT:    shrl $16, %eax
 ; ATOM-NEXT:    shrl $8, %edi

Modified: llvm/trunk/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll?rev=356665&r1=356664&r2=356665&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll (original)
+++ llvm/trunk/test/Transforms/CodeGenPrepare/X86/overflow-intrinsics.ll Thu Mar 21 06:57:07 2019
@@ -475,23 +475,22 @@ define i64 @foo2(i8 *%p) {
   ret i64 %sub
 }
 
-; When the compare operand has uses besides add/sub,
-; the transform may not be profitable.
+; Avoid hoisting a math op into a dominating block which would
+; increase the critical path.
 
 define void @PR41129(i64* %p64) {
 ; CHECK-LABEL: @PR41129(
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[KEY:%.*]] = load i64, i64* [[P64:%.*]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = call { i64, i1 } @llvm.usub.with.overflow.i64(i64 [[KEY]], i64 1)
-; CHECK-NEXT:    [[MATH:%.*]] = extractvalue { i64, i1 } [[TMP0]], 0
-; CHECK-NEXT:    [[OV:%.*]] = extractvalue { i64, i1 } [[TMP0]], 1
-; CHECK-NEXT:    br i1 [[OV]], label [[TRUE:%.*]], label [[FALSE:%.*]]
+; CHECK-NEXT:    [[COND17:%.*]] = icmp eq i64 [[KEY]], 0
+; CHECK-NEXT:    br i1 [[COND17]], label [[TRUE:%.*]], label [[FALSE:%.*]]
 ; CHECK:       false:
 ; CHECK-NEXT:    [[ANDVAL:%.*]] = and i64 [[KEY]], 7
 ; CHECK-NEXT:    store i64 [[ANDVAL]], i64* [[P64]]
 ; CHECK-NEXT:    br label [[EXIT:%.*]]
 ; CHECK:       true:
-; CHECK-NEXT:    store i64 [[MATH]], i64* [[P64]]
+; CHECK-NEXT:    [[SVALUE:%.*]] = add i64 [[KEY]], -1
+; CHECK-NEXT:    store i64 [[SVALUE]], i64* [[P64]]
 ; CHECK-NEXT:    br label [[EXIT]]
 ; CHECK:       exit:
 ; CHECK-NEXT:    ret void




More information about the llvm-commits mailing list