[llvm] r323571 - [x86] auto-generate complete checks; NFC

Fri Jan 26 14:06:07 PST 2018

Author: spatel
Date: Fri Jan 26 14:06:07 2018
New Revision: 323571

URL: http://llvm.org/viewvc/llvm-project?rev=323571&view=rev
Log:
[x86] auto-generate complete checks; NFC

Modified:
    llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
    llvm/trunk/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll
    llvm/trunk/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll

Modified: llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll?rev=323571&r1=323570&r2=323571&view=diff
==============================================================================

--- llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll (original)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll Fri Jan 26 14:06:07 2018
@@ -1,27 +1,70 @@
-; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -O3 -march=x86 -mcpu=core2 | FileCheck %s -check-prefix=X32
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -O3 -mtriple=x86_64-unknown-unknown -mcpu=core2 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -O3 -mtriple=i686-unknown-unknown   -mcpu=core2 | FileCheck %s -check-prefix=X32
 
 ; @simple is the most basic chain of address induction variables. Chaining
 ; saves at least one register and avoids complex addressing and setup
 ; code.
 ;
-; X64: @simple
 ; %x * 4
-; X64: shlq $2
 ; no other address computation in the preheader
-; X64-NEXT: xorl
-; X64-NEXT: .p2align
-; X64: %loop
 ; no complex address modes
-; X64-NOT: (%{{[^)]+}},%{{[^)]+}},
 ;
-; X32: @simple
 ; no expensive address computation in the preheader
-; X32-NOT: imul
-; X32: %loop
 ; no complex address modes
-; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
+
 define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
+; X64-LABEL: simple:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movslq %edx, %rcx
+; X64-NEXT:    shlq $2, %rcx
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB0_1: # %loop
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    addl (%rdi), %eax
+; X64-NEXT:    leaq (%rdi,%rcx), %r8
+; X64-NEXT:    addl (%rdi,%rcx), %eax
+; X64-NEXT:    leaq (%r8,%rcx), %rdx
+; X64-NEXT:    addl (%rcx,%r8), %eax
+; X64-NEXT:    addl (%rcx,%rdx), %eax
+; X64-NEXT:    addq %rcx, %rdx
+; X64-NEXT:    addq %rcx, %rdx
+; X64-NEXT:    movq %rdx, %rdi
+; X64-NEXT:    cmpq %rsi, %rdx
+; X64-NEXT:    jne .LBB0_1
+; X64-NEXT:  # %bb.2: # %exit
+; X64-NEXT:    retq
+;
+; X32-LABEL: simple:
+; X32:       # %bb.0: # %entry
+; X32-NEXT:    pushl %ebx
+; X32-NEXT:    pushl %edi
+; X32-NEXT:    pushl %esi
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    shll $2, %edx
+; X32-NEXT:    xorl %eax, %eax
+; X32-NEXT:    .p2align 4, 0x90
+; X32-NEXT:  .LBB0_1: # %loop
+; X32-NEXT:    # =>This Inner Loop Header: Depth=1
+; X32-NEXT:    addl (%esi), %eax
+; X32-NEXT:    leal (%esi,%edx), %edi
+; X32-NEXT:    addl (%esi,%edx), %eax
+; X32-NEXT:    leal (%edi,%edx), %ebx
+; X32-NEXT:    addl (%edx,%edi), %eax
+; X32-NEXT:    addl (%edx,%ebx), %eax
+; X32-NEXT:    addl %edx, %ebx
+; X32-NEXT:    addl %edx, %ebx
+; X32-NEXT:    movl %ebx, %esi
+; X32-NEXT:    cmpl %ecx, %ebx
+; X32-NEXT:    jne .LBB0_1
+; X32-NEXT:  # %bb.2: # %exit
+; X32-NEXT:    popl %esi
+; X32-NEXT:    popl %edi
+; X32-NEXT:    popl %ebx
+; X32-NEXT:    retl
 entry:
   br label %loop
 loop:
@@ -47,23 +90,60 @@ exit:
 
 ; @user is not currently chained because the IV is live across memory ops.
 ;
-; X64: @user
-; X64: shlq $4
-; X64: lea
-; X64: lea
-; X64: %loop
-; complex address modes
-; X64: (%{{[^)]+}},%{{[^)]+}},
-;
-; X32: @user
 ; expensive address computation in the preheader
-; X32: shll $4
-; X32: lea
-; X32: lea
-; X32: %loop
 ; complex address modes
-; X32: (%{{[^)]+}},%{{[^)]+}},
 define i32 @user(i32* %a, i32* %b, i32 %x) nounwind {
+; X64-LABEL: user:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movslq %edx, %rcx
+; X64-NEXT:    movq %rcx, %rdx
+; X64-NEXT:    shlq $4, %rdx
+; X64-NEXT:    leaq (,%rcx,4), %rax
+; X64-NEXT:    leaq (%rax,%rax,2), %r8
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB1_1: # %loop
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    addl (%rdi), %eax
+; X64-NEXT:    addl (%rdi,%rcx,4), %eax
+; X64-NEXT:    addl (%rdi,%rcx,8), %eax
+; X64-NEXT:    addl (%rdi,%r8), %eax
+; X64-NEXT:    movl %eax, (%rdi)
+; X64-NEXT:    addq %rdx, %rdi
+; X64-NEXT:    cmpq %rdi, %rsi
+; X64-NEXT:    jne .LBB1_1
+; X64-NEXT:  # %bb.2: # %exit
+; X64-NEXT:    retq
+;
+; X32-LABEL: user:
+; X32:       # %bb.0: # %entry
+; X32-NEXT:    pushl %ebx
+; X32-NEXT:    pushl %edi
+; X32-NEXT:    pushl %esi
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT:    movl %ecx, %edi
+; X32-NEXT:    shll $4, %edi
+; X32-NEXT:    leal (,%ecx,4), %eax
+; X32-NEXT:    leal (%eax,%eax,2), %ebx
+; X32-NEXT:    xorl %eax, %eax
+; X32-NEXT:    .p2align 4, 0x90
+; X32-NEXT:  .LBB1_1: # %loop
+; X32-NEXT:    # =>This Inner Loop Header: Depth=1
+; X32-NEXT:    addl (%esi), %eax
+; X32-NEXT:    addl (%esi,%ecx,4), %eax
+; X32-NEXT:    addl (%esi,%ecx,8), %eax
+; X32-NEXT:    addl (%esi,%ebx), %eax
+; X32-NEXT:    movl %eax, (%esi)
+; X32-NEXT:    addl %edi, %esi
+; X32-NEXT:    cmpl %esi, %edx
+; X32-NEXT:    jne .LBB1_1
+; X32-NEXT:  # %bb.2: # %exit
+; X32-NEXT:    popl %esi
+; X32-NEXT:    popl %edi
+; X32-NEXT:    popl %ebx
+; X32-NEXT:    retl
 entry:
   br label %loop
 loop:
@@ -93,20 +173,94 @@ exit:
 ; used to do, and exactly what we don't want to do. LSR's new IV
 ; chaining feature should now undo the damage.
 ;
-; X64: extrastride:
 ; We currently don't handle this on X64 because the sexts cause
 ; strange increment expressions like this:
 ; IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
 ;
-; X32: extrastride:
-; no spills in the preheader
-; X32-NOT: mov{{.*}}(%esp){{$}}
-; X32: %for.body{{$}}
-; no complex address modes
-; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
-; no reloads
-; X32-NOT: (%esp)
+; For x32, no spills in the preheader, no complex address modes, no reloads.
+
 define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
+; X64-LABEL: extrastride:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    pushq %rbp
+; X64-NEXT:    pushq %r14
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    # kill: def %ecx killed %ecx def %rcx
+; X64-NEXT:    # kill: def %esi killed %esi def %rsi
+; X64-NEXT:    testl %r9d, %r9d
+; X64-NEXT:    je .LBB2_3
+; X64-NEXT:  # %bb.1: # %for.body.lr.ph
+; X64-NEXT:    leal (%rsi,%rsi), %r14d
+; X64-NEXT:    leal (%rsi,%rsi,2), %ebx
+; X64-NEXT:    addl %esi, %ecx
+; X64-NEXT:    leal (,%rsi,4), %eax
+; X64-NEXT:    leal (%rcx,%rsi,4), %ebp
+; X64-NEXT:    movslq %eax, %r10
+; X64-NEXT:    movslq %ebx, %r11
+; X64-NEXT:    movslq %r14d, %rbx
+; X64-NEXT:    movslq %esi, %rsi
+; X64-NEXT:    movslq %r8d, %rcx
+; X64-NEXT:    shlq $2, %rcx
+; X64-NEXT:    movslq %ebp, %rax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB2_2: # %for.body
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl (%rdi,%rsi), %ebp
+; X64-NEXT:    addl (%rdi), %ebp
+; X64-NEXT:    addl (%rdi,%rbx), %ebp
+; X64-NEXT:    addl (%rdi,%r11), %ebp
+; X64-NEXT:    addl (%rdi,%r10), %ebp
+; X64-NEXT:    movl %ebp, (%rdx)
+; X64-NEXT:    addq %rax, %rdi
+; X64-NEXT:    addq %rcx, %rdx
+; X64-NEXT:    decl %r9d
+; X64-NEXT:    jne .LBB2_2
+; X64-NEXT:  .LBB2_3: # %for.end
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    popq %r14
+; X64-NEXT:    popq %rbp
+; X64-NEXT:    retq
+;
+; X32-LABEL: extrastride:
+; X32:       # %bb.0: # %entry
+; X32-NEXT:    pushl %ebp
+; X32-NEXT:    pushl %ebx
+; X32-NEXT:    pushl %edi
+; X32-NEXT:    pushl %esi
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    testl %eax, %eax
+; X32-NEXT:    je .LBB2_3
+; X32-NEXT:  # %bb.1: # %for.body.lr.ph
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ebx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT:    addl %esi, %edi
+; X32-NEXT:    shll $2, %ecx
+; X32-NEXT:    .p2align 4, 0x90
+; X32-NEXT:  .LBB2_2: # %for.body
+; X32-NEXT:    # =>This Inner Loop Header: Depth=1
+; X32-NEXT:    movl (%ebx,%esi), %ebp
+; X32-NEXT:    addl (%ebx), %ebp
+; X32-NEXT:    leal (%ebx,%esi), %ebx
+; X32-NEXT:    addl (%esi,%ebx), %ebp
+; X32-NEXT:    leal (%ebx,%esi), %ebx
+; X32-NEXT:    addl (%esi,%ebx), %ebp
+; X32-NEXT:    leal (%ebx,%esi), %ebx
+; X32-NEXT:    addl (%esi,%ebx), %ebp
+; X32-NEXT:    movl %ebp, (%edx)
+; X32-NEXT:    leal (%ebx,%esi), %ebx
+; X32-NEXT:    addl %edi, %ebx
+; X32-NEXT:    addl %ecx, %edx
+; X32-NEXT:    decl %eax
+; X32-NEXT:    jne .LBB2_2
+; X32-NEXT:  .LBB2_3: # %for.end
+; X32-NEXT:    popl %esi
+; X32-NEXT:    popl %edi
+; X32-NEXT:    popl %ebx
+; X32-NEXT:    popl %ebp
+; X32-NEXT:    retl
 entry:
   %cmp8 = icmp eq i32 %z, 0
   br i1 %cmp8, label %for.end, label %for.body.lr.ph
@@ -158,13 +312,71 @@ for.end:
 ;  }
 ; where 's' can be folded into the addressing mode.
 ; Consequently, we should *not* form any chains.
-;
-; X64: foldedidx:
-; X64: movzbl -3(
-;
-; X32: foldedidx:
-; X32: movzbl 400(
+
 define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
+; X64-LABEL: foldedidx:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movl $3, %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB3_1: # %for.body
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movzbl -3(%rdi,%rax), %r8d
+; X64-NEXT:    movzbl -3(%rsi,%rax), %ecx
+; X64-NEXT:    addl %r8d, %ecx
+; X64-NEXT:    movb %cl, -3(%rdx,%rax)
+; X64-NEXT:    movzbl -2(%rdi,%rax), %r8d
+; X64-NEXT:    movzbl -2(%rsi,%rax), %ecx
+; X64-NEXT:    addl %r8d, %ecx
+; X64-NEXT:    movb %cl, -2(%rdx,%rax)
+; X64-NEXT:    movzbl -1(%rdi,%rax), %r8d
+; X64-NEXT:    movzbl -1(%rsi,%rax), %ecx
+; X64-NEXT:    addl %r8d, %ecx
+; X64-NEXT:    movb %cl, -1(%rdx,%rax)
+; X64-NEXT:    movzbl (%rdi,%rax), %r8d
+; X64-NEXT:    movzbl (%rsi,%rax), %ecx
+; X64-NEXT:    addl %r8d, %ecx
+; X64-NEXT:    movb %cl, (%rdx,%rax)
+; X64-NEXT:    addq $4, %rax
+; X64-NEXT:    cmpl $403, %eax # imm = 0x193
+; X64-NEXT:    jne .LBB3_1
+; X64-NEXT:  # %bb.2: # %for.end
+; X64-NEXT:    retq
+;
+; X32-LABEL: foldedidx:
+; X32:       # %bb.0: # %entry
+; X32-NEXT:    pushl %ebx
+; X32-NEXT:    pushl %edi
+; X32-NEXT:    pushl %esi
+; X32-NEXT:    movl $-400, %eax # imm = 0xFE70
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT:    .p2align 4, 0x90
+; X32-NEXT:  .LBB3_1: # %for.body
+; X32-NEXT:    # =>This Inner Loop Header: Depth=1
+; X32-NEXT:    movzbl 400(%esi,%eax), %edi
+; X32-NEXT:    movzbl 400(%edx,%eax), %ebx
+; X32-NEXT:    addl %edi, %ebx
+; X32-NEXT:    movb %bl, 400(%ecx,%eax)
+; X32-NEXT:    movzbl 401(%esi,%eax), %edi
+; X32-NEXT:    movzbl 401(%edx,%eax), %ebx
+; X32-NEXT:    addl %edi, %ebx
+; X32-NEXT:    movb %bl, 401(%ecx,%eax)
+; X32-NEXT:    movzbl 402(%esi,%eax), %edi
+; X32-NEXT:    movzbl 402(%edx,%eax), %ebx
+; X32-NEXT:    addl %edi, %ebx
+; X32-NEXT:    movb %bl, 402(%ecx,%eax)
+; X32-NEXT:    movzbl 403(%esi,%eax), %edi
+; X32-NEXT:    movzbl 403(%edx,%eax), %ebx
+; X32-NEXT:    addl %edi, %ebx
+; X32-NEXT:    movb %bl, 403(%ecx,%eax)
+; X32-NEXT:    addl $4, %eax
+; X32-NEXT:    jne .LBB3_1
+; X32-NEXT:  # %bb.2: # %for.end
+; X32-NEXT:    popl %esi
+; X32-NEXT:    popl %edi
+; X32-NEXT:    popl %ebx
+; X32-NEXT:    retl
 entry:
   br label %for.body
 
@@ -223,27 +435,49 @@ for.end:
 
 ; @multioper tests instructions with multiple IV user operands. We
 ; should be able to chain them independent of each other.
-;
-; X64: @multioper
-; X64: %for.body
-; X64: movl %{{.*}},4)
-; X64-NEXT: leal 1(
-; X64-NEXT: movl %{{.*}},4)
-; X64-NEXT: leal 2(
-; X64-NEXT: movl %{{.*}},4)
-; X64-NEXT: leal 3(
-; X64-NEXT: movl %{{.*}},4)
-;
-; X32: @multioper
-; X32: %for.body
-; X32: movl %{{.*}},4)
-; X32-NEXT: leal 1(
-; X32-NEXT: movl %{{.*}},4)
-; X32-NEXT: leal 2(
-; X32-NEXT: movl %{{.*}},4)
-; X32-NEXT: leal 3(
-; X32-NEXT: movl %{{.*}},4)
+
 define void @multioper(i32* %a, i32 %n) nounwind {
+; X64-LABEL: multioper:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    xorl %eax, %eax
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB4_1: # %for.body
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movl %eax, (%rdi,%rax,4)
+; X64-NEXT:    leal 1(%rax), %ecx
+; X64-NEXT:    movl %ecx, 4(%rdi,%rax,4)
+; X64-NEXT:    leal 2(%rax), %ecx
+; X64-NEXT:    movl %ecx, 8(%rdi,%rax,4)
+; X64-NEXT:    leal 3(%rax), %ecx
+; X64-NEXT:    movl %ecx, 12(%rdi,%rax,4)
+; X64-NEXT:    addq $4, %rax
+; X64-NEXT:    cmpl %esi, %eax
+; X64-NEXT:    jl .LBB4_1
+; X64-NEXT:  # %bb.2: # %exit
+; X64-NEXT:    retq
+;
+; X32-LABEL: multioper:
+; X32:       # %bb.0: # %entry
+; X32-NEXT:    pushl %esi
+; X32-NEXT:    xorl %eax, %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    .p2align 4, 0x90
+; X32-NEXT:  .LBB4_1: # %for.body
+; X32-NEXT:    # =>This Inner Loop Header: Depth=1
+; X32-NEXT:    movl %eax, (%edx,%eax,4)
+; X32-NEXT:    leal 1(%eax), %esi
+; X32-NEXT:    movl %esi, 4(%edx,%eax,4)
+; X32-NEXT:    leal 2(%eax), %esi
+; X32-NEXT:    movl %esi, 8(%edx,%eax,4)
+; X32-NEXT:    leal 3(%eax), %esi
+; X32-NEXT:    movl %esi, 12(%edx,%eax,4)
+; X32-NEXT:    addl $4, %eax
+; X32-NEXT:    cmpl %ecx, %eax
+; X32-NEXT:    jl .LBB4_1
+; X32-NEXT:  # %bb.2: # %exit
+; X32-NEXT:    popl %esi
+; X32-NEXT:    retl
 entry:
   br label %for.body
 
@@ -272,12 +506,51 @@ exit:
 ; @testCmpZero has a ICmpZero LSR use that should not be hidden from
 ; LSR. Profitable chains should have more than one nonzero increment
 ; anyway.
-;
-; X32: @testCmpZero
-; X32: %for.body82.us
-; X32: cmp
-; X32: jne
+
 define void @testCmpZero(i8* %src, i8* %dst, i32 %srcidx, i32 %dstidx, i32 %len) nounwind ssp {
+; X64-LABEL: testCmpZero:
+; X64:       # %bb.0: # %entry
+; X64-NEXT:    movslq %edx, %rdx
+; X64-NEXT:    addq %rdx, %rdi
+; X64-NEXT:    movslq %ecx, %r9
+; X64-NEXT:    addq %rsi, %r9
+; X64-NEXT:    addl %edx, %r8d
+; X64-NEXT:    movslq %r8d, %rcx
+; X64-NEXT:    subq %rdx, %rcx
+; X64-NEXT:    xorl %edx, %edx
+; X64-NEXT:    .p2align 4, 0x90
+; X64-NEXT:  .LBB5_1: # %for.body82.us
+; X64-NEXT:    # =>This Inner Loop Header: Depth=1
+; X64-NEXT:    movzbl (%r9,%rdx,4), %eax
+; X64-NEXT:    movb %al, (%rdi,%rdx)
+; X64-NEXT:    incq %rdx
+; X64-NEXT:    cmpq %rdx, %rcx
+; X64-NEXT:    jne .LBB5_1
+; X64-NEXT:  # %bb.2: # %return
+; X64-NEXT:    retq
+;
+; X32-LABEL: testCmpZero:
+; X32:       # %bb.0: # %entry
+; X32-NEXT:    pushl %ebx
+; X32-NEXT:    pushl %esi
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    addl {{[0-9]+}}(%esp), %edx
+; X32-NEXT:    xorl %esi, %esi
+; X32-NEXT:    .p2align 4, 0x90
+; X32-NEXT:  .LBB5_1: # %for.body82.us
+; X32-NEXT:    # =>This Inner Loop Header: Depth=1
+; X32-NEXT:    movzbl (%edx,%esi,4), %ebx
+; X32-NEXT:    movb %bl, (%ecx,%esi)
+; X32-NEXT:    incl %esi
+; X32-NEXT:    cmpl %esi, %eax
+; X32-NEXT:    jne .LBB5_1
+; X32-NEXT:  # %bb.2: # %return
+; X32-NEXT:    popl %esi
+; X32-NEXT:    popl %ebx
+; X32-NEXT:    retl
 entry:
   %dest0 = getelementptr inbounds i8, i8* %src, i32 %srcidx
   %source0 = getelementptr inbounds i8, i8* %dst, i32 %dstidx

Modified: llvm/trunk/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll?rev=323571&r1=323570&r2=323571&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll (original)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll Fri Jan 26 14:06:07 2018
@@ -1,13 +1,11 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -loop-reduce -mtriple=x86_64  -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
 ; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
-; RUN: llc < %s -O2 -march=x86-64 -lsr-insns-cost -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -lsr-insns-cost | FileCheck %s
 
 ; OPT test checks that LSR optimize compare for static counter to compare with 0.
 
-; BOTH: for.body:
-; INSN: icmp eq i64 %lsr.iv.next, 0
-; REGS: icmp eq i64 %indvars.iv.next, 1024
-
 ; LLC test checks that LSR optimize compare for static counter.
 ; That means that instead of creating the following:
 ;   movl %ecx, (%rdx,%rax,4)
@@ -20,17 +18,67 @@
 ;   movl %ecx, 4096(%rdx,%rax,4)
 ;   incq %rax
 
-; CHECK:      LBB0_1:
-; CHECK-NEXT:   movl 4096(%{{.+}},[[REG:%[0-9a-z]+]]
-; CHECK-NEXT:   addl 4096(%{{.+}},[[REG]]
-; CHECK-NEXT:   movl %{{.+}}, 4096(%{{.+}},[[REG]]
-; CHECK-NOT:    cmp
-; CHECK:        jne
-
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
-; Function Attrs: norecurse nounwind uwtable
 define void @foo(i32* nocapture readonly %x, i32* nocapture readonly %y, i32* nocapture %q) {
+; INSN-LABEL: @foo(
+; INSN-NEXT:  entry:
+; INSN-NEXT:    [[Q1:%.*]] = bitcast i32* [[Q:%.*]] to i8*
+; INSN-NEXT:    [[Y3:%.*]] = bitcast i32* [[Y:%.*]] to i8*
+; INSN-NEXT:    [[X7:%.*]] = bitcast i32* [[X:%.*]] to i8*
+; INSN-NEXT:    br label [[FOR_BODY:%.*]]
+; INSN:       for.cond.cleanup:
+; INSN-NEXT:    ret void
+; INSN:       for.body:
+; INSN-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ -4096, [[ENTRY:%.*]] ]
+; INSN-NEXT:    [[UGLYGEP8:%.*]] = getelementptr i8, i8* [[X7]], i64 [[LSR_IV]]
+; INSN-NEXT:    [[UGLYGEP89:%.*]] = bitcast i8* [[UGLYGEP8]] to i32*
+; INSN-NEXT:    [[SCEVGEP10:%.*]] = getelementptr i32, i32* [[UGLYGEP89]], i64 1024
+; INSN-NEXT:    [[TMP:%.*]] = load i32, i32* [[SCEVGEP10]], align 4
+; INSN-NEXT:    [[UGLYGEP4:%.*]] = getelementptr i8, i8* [[Y3]], i64 [[LSR_IV]]
+; INSN-NEXT:    [[UGLYGEP45:%.*]] = bitcast i8* [[UGLYGEP4]] to i32*
+; INSN-NEXT:    [[SCEVGEP6:%.*]] = getelementptr i32, i32* [[UGLYGEP45]], i64 1024
+; INSN-NEXT:    [[TMP1:%.*]] = load i32, i32* [[SCEVGEP6]], align 4
+; INSN-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP]]
+; INSN-NEXT:    [[UGLYGEP:%.*]] = getelementptr i8, i8* [[Q1]], i64 [[LSR_IV]]
+; INSN-NEXT:    [[UGLYGEP2:%.*]] = bitcast i8* [[UGLYGEP]] to i32*
+; INSN-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[UGLYGEP2]], i64 1024
+; INSN-NEXT:    store i32 [[ADD]], i32* [[SCEVGEP]], align 4
+; INSN-NEXT:    [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 4
+; INSN-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
+; INSN-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+;
+; REGS-LABEL: @foo(
+; REGS-NEXT:  entry:
+; REGS-NEXT:    br label [[FOR_BODY:%.*]]
+; REGS:       for.cond.cleanup:
+; REGS-NEXT:    ret void
+; REGS:       for.body:
+; REGS-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; REGS-NEXT:    [[SCEVGEP2:%.*]] = getelementptr i32, i32* [[X:%.*]], i64 [[INDVARS_IV]]
+; REGS-NEXT:    [[TMP:%.*]] = load i32, i32* [[SCEVGEP2]], align 4
+; REGS-NEXT:    [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[Y:%.*]], i64 [[INDVARS_IV]]
+; REGS-NEXT:    [[TMP1:%.*]] = load i32, i32* [[SCEVGEP1]], align 4
+; REGS-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP]]
+; REGS-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDVARS_IV]]
+; REGS-NEXT:    store i32 [[ADD]], i32* [[SCEVGEP]], align 4
+; REGS-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; REGS-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
+; REGS-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+;
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    movq $-4096, %rax # imm = 0xF000
+; CHECK-NEXT:    .p2align 4, 0x90
+; CHECK-NEXT:  .LBB0_1: # %for.body
+; CHECK-NEXT:    # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT:    movl 4096(%rsi,%rax), %ecx
+; CHECK-NEXT:    addl 4096(%rdi,%rax), %ecx
+; CHECK-NEXT:    movl %ecx, 4096(%rdx,%rax)
+; CHECK-NEXT:    addq $4, %rax
+; CHECK-NEXT:    jne .LBB0_1
+; CHECK-NEXT:  # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT:    retq
 entry:
   br label %for.body
 
@@ -50,3 +98,4 @@ for.body:
   %exitcond = icmp eq i64 %indvars.iv.next, 1024
   br i1 %exitcond, label %for.cond.cleanup, label %for.body
 }
+

Modified: llvm/trunk/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll?rev=323571&r1=323570&r2=323571&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll (original)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll Fri Jan 26 14:06:07 2018
@@ -1,5 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt -loop-reduce -S < %s | FileCheck %s
+
 ; Check when we use an outerloop induction variable inside of an innerloop
 ; induction value expr, LSR can still choose to use single induction variable
 ; for the innerloop and share it in multiple induction value exprs.
@@ -8,6 +9,46 @@ target datalayout = "e-m:e-i64:64-f80:12
 target triple = "x86_64-unknown-linux-gnu"
 
 define void @foo(i32 %size, i32 %nsteps, i32 %hsize, i32* %lined, i8* %maxarray) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[CMP215:%.*]] = icmp sgt i32 [[SIZE:%.*]], 1
+; CHECK-NEXT:    [[T0:%.*]] = zext i32 [[SIZE]] to i64
+; CHECK-NEXT:    [[T1:%.*]] = sext i32 [[NSTEPS:%.*]] to i64
+; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[T0]], -1
+; CHECK-NEXT:    [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to i8*
+; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], [[FOR_INC:%.*]] ], [ 1, [[ENTRY:%.*]] ]
+; CHECK-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT:    [[LSR_IV13:%.*]] = inttoptr i64 [[LSR_IV1]] to i8*
+; CHECK-NEXT:    br i1 [[CMP215]], label [[FOR_BODY2_PREHEADER:%.*]], label [[FOR_INC]]
+; CHECK:       for.body2.preheader:
+; CHECK-NEXT:    br label [[FOR_BODY2:%.*]]
+; CHECK:       for.body2:
+; CHECK-NEXT:    [[LSR_IV4:%.*]] = phi i8* [ [[SCEVGEP:%.*]], [[FOR_BODY2]] ], [ [[MAXARRAY:%.*]], [[FOR_BODY2_PREHEADER]] ]
+; CHECK-NEXT:    [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY2]] ], [ [[TMP0]], [[FOR_BODY2_PREHEADER]] ]
+; CHECK-NEXT:    [[LSR_IV45:%.*]] = ptrtoint i8* [[LSR_IV4]] to i64
+; CHECK-NEXT:    [[SCEVGEP8:%.*]] = getelementptr i8, i8* [[LSR_IV4]], i64 1
+; CHECK-NEXT:    [[V1:%.*]] = load i8, i8* [[SCEVGEP8]], align 1
+; CHECK-NEXT:    [[SCEVGEP7:%.*]] = getelementptr i8, i8* [[TMP1]], i64 [[LSR_IV45]]
+; CHECK-NEXT:    [[V2:%.*]] = load i8, i8* [[SCEVGEP7]], align 1
+; CHECK-NEXT:    [[TMPV:%.*]] = xor i8 [[V1]], [[V2]]
+; CHECK-NEXT:    [[SCEVGEP6:%.*]] = getelementptr i8, i8* [[LSR_IV13]], i64 [[LSR_IV45]]
+; CHECK-NEXT:    store i8 [[TMPV]], i8* [[SCEVGEP6]], align 1
+; CHECK-NEXT:    [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
+; CHECK-NEXT:    [[SCEVGEP]] = getelementptr i8, i8* [[LSR_IV4]], i64 1
+; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[LSR_IV_NEXT]], 0
+; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY2]], label [[FOR_INC_LOOPEXIT:%.*]]
+; CHECK:       for.inc.loopexit:
+; CHECK-NEXT:    br label [[FOR_INC]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
+; CHECK-NEXT:    [[LSR_IV_NEXT2]] = add nuw nsw i64 [[LSR_IV1]], [[T0]]
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT3]], [[T1]]
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
+; CHECK:       for.end.loopexit:
+; CHECK-NEXT:    ret void
+;
 entry:
   %cmp215 = icmp sgt i32 %size, 1
   %t0 = zext i32 %size to i64
@@ -25,20 +66,6 @@ for.body2.preheader:
 
 ; Check LSR only generates two induction variables for for.body2 one for compare and
 ; one to shared by multiple array accesses.
-; CHECK: for.body2:
-; CHECK-NEXT: [[LSRAR:%[^,]+]] = phi i8* [ %scevgep, %for.body2 ], [ %maxarray, %for.body2.preheader ]
-; CHECK-NEXT: [[LSR:%[^,]+]] = phi i64 [ %lsr.iv.next, %for.body2 ], [ %0, %for.body2.preheader ]
-; CHECK-NOT:  = phi i64 [ {{.*}}, %for.body2 ], [ {{.*}}, %for.body2.preheader ]
-; CHECK:      [[LSRINT:%[^,]+]] = ptrtoint i8* [[LSRAR]] to i64
-; CHECK:      [[SCEVGEP1:%[^,]+]] = getelementptr i8, i8* [[LSRAR]], i64 1
-; CHECK:      {{.*}} = load i8, i8* [[SCEVGEP1]], align 1
-; CHECK:      [[SCEVGEP2:%[^,]+]] = getelementptr i8, i8* %1, i64 [[LSRINT]]
-; CHECK:      {{.*}} = load i8, i8* [[SCEVGEP2]], align 1
-; CHECK:      [[SCEVGEP3:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSRINT]]
-; CHECK:      store i8 {{.*}}, i8* [[SCEVGEP3]], align 1
-; CHECK:      [[LSRNEXT:%[^,]+]] = add i64 [[LSR]], -1
-; CHECK:      %exitcond = icmp ne i64 [[LSRNEXT]], 0
-; CHECK:      br i1 %exitcond, label %for.body2, label %for.inc.loopexit
 
 for.body2:                                        ; preds = %for.body2.preheader, %for.body2
   %indvars.iv = phi i64 [ 1, %for.body2.preheader ], [ %indvars.iv.next, %for.body2 ]
@@ -67,3 +94,4 @@ for.inc:
 for.end.loopexit:                                 ; preds = %for.inc
   ret void
 }
+