[llvm] r323571 - [x86] auto-generate complete checks; NFC
Sanjay Patel via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 26 14:06:07 PST 2018
Author: spatel
Date: Fri Jan 26 14:06:07 2018
New Revision: 323571
URL: http://llvm.org/viewvc/llvm-project?rev=323571&view=rev
Log:
[x86] auto-generate complete checks; NFC
Modified:
llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
llvm/trunk/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll
llvm/trunk/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll
Modified: llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll?rev=323571&r1=323570&r2=323571&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll (original)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll Fri Jan 26 14:06:07 2018
@@ -1,27 +1,70 @@
-; RUN: llc < %s -O3 -march=x86-64 -mcpu=core2 | FileCheck %s -check-prefix=X64
-; RUN: llc < %s -O3 -march=x86 -mcpu=core2 | FileCheck %s -check-prefix=X32
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -O3 -mtriple=x86_64-unknown-unknown -mcpu=core2 | FileCheck %s -check-prefix=X64
+; RUN: llc < %s -O3 -mtriple=i686-unknown-unknown -mcpu=core2 | FileCheck %s -check-prefix=X32
; @simple is the most basic chain of address induction variables. Chaining
; saves at least one register and avoids complex addressing and setup
; code.
;
-; X64: @simple
; %x * 4
-; X64: shlq $2
; no other address computation in the preheader
-; X64-NEXT: xorl
-; X64-NEXT: .p2align
-; X64: %loop
; no complex address modes
-; X64-NOT: (%{{[^)]+}},%{{[^)]+}},
;
-; X32: @simple
; no expensive address computation in the preheader
-; X32-NOT: imul
-; X32: %loop
; no complex address modes
-; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
+
define i32 @simple(i32* %a, i32* %b, i32 %x) nounwind {
+; X64-LABEL: simple:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movslq %edx, %rcx
+; X64-NEXT: shlq $2, %rcx
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: .p2align 4, 0x90
+; X64-NEXT: .LBB0_1: # %loop
+; X64-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-NEXT: addl (%rdi), %eax
+; X64-NEXT: leaq (%rdi,%rcx), %r8
+; X64-NEXT: addl (%rdi,%rcx), %eax
+; X64-NEXT: leaq (%r8,%rcx), %rdx
+; X64-NEXT: addl (%rcx,%r8), %eax
+; X64-NEXT: addl (%rcx,%rdx), %eax
+; X64-NEXT: addq %rcx, %rdx
+; X64-NEXT: addq %rcx, %rdx
+; X64-NEXT: movq %rdx, %rdi
+; X64-NEXT: cmpq %rsi, %rdx
+; X64-NEXT: jne .LBB0_1
+; X64-NEXT: # %bb.2: # %exit
+; X64-NEXT: retq
+;
+; X32-LABEL: simple:
+; X32: # %bb.0: # %entry
+; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %edi
+; X32-NEXT: pushl %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: shll $2, %edx
+; X32-NEXT: xorl %eax, %eax
+; X32-NEXT: .p2align 4, 0x90
+; X32-NEXT: .LBB0_1: # %loop
+; X32-NEXT: # =>This Inner Loop Header: Depth=1
+; X32-NEXT: addl (%esi), %eax
+; X32-NEXT: leal (%esi,%edx), %edi
+; X32-NEXT: addl (%esi,%edx), %eax
+; X32-NEXT: leal (%edi,%edx), %ebx
+; X32-NEXT: addl (%edx,%edi), %eax
+; X32-NEXT: addl (%edx,%ebx), %eax
+; X32-NEXT: addl %edx, %ebx
+; X32-NEXT: addl %edx, %ebx
+; X32-NEXT: movl %ebx, %esi
+; X32-NEXT: cmpl %ecx, %ebx
+; X32-NEXT: jne .LBB0_1
+; X32-NEXT: # %bb.2: # %exit
+; X32-NEXT: popl %esi
+; X32-NEXT: popl %edi
+; X32-NEXT: popl %ebx
+; X32-NEXT: retl
entry:
br label %loop
loop:
@@ -47,23 +90,60 @@ exit:
; @user is not currently chained because the IV is live across memory ops.
;
-; X64: @user
-; X64: shlq $4
-; X64: lea
-; X64: lea
-; X64: %loop
-; complex address modes
-; X64: (%{{[^)]+}},%{{[^)]+}},
-;
-; X32: @user
; expensive address computation in the preheader
-; X32: shll $4
-; X32: lea
-; X32: lea
-; X32: %loop
; complex address modes
-; X32: (%{{[^)]+}},%{{[^)]+}},
define i32 @user(i32* %a, i32* %b, i32 %x) nounwind {
+; X64-LABEL: user:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movslq %edx, %rcx
+; X64-NEXT: movq %rcx, %rdx
+; X64-NEXT: shlq $4, %rdx
+; X64-NEXT: leaq (,%rcx,4), %rax
+; X64-NEXT: leaq (%rax,%rax,2), %r8
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: .p2align 4, 0x90
+; X64-NEXT: .LBB1_1: # %loop
+; X64-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-NEXT: addl (%rdi), %eax
+; X64-NEXT: addl (%rdi,%rcx,4), %eax
+; X64-NEXT: addl (%rdi,%rcx,8), %eax
+; X64-NEXT: addl (%rdi,%r8), %eax
+; X64-NEXT: movl %eax, (%rdi)
+; X64-NEXT: addq %rdx, %rdi
+; X64-NEXT: cmpq %rdi, %rsi
+; X64-NEXT: jne .LBB1_1
+; X64-NEXT: # %bb.2: # %exit
+; X64-NEXT: retq
+;
+; X32-LABEL: user:
+; X32: # %bb.0: # %entry
+; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %edi
+; X32-NEXT: pushl %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl %ecx, %edi
+; X32-NEXT: shll $4, %edi
+; X32-NEXT: leal (,%ecx,4), %eax
+; X32-NEXT: leal (%eax,%eax,2), %ebx
+; X32-NEXT: xorl %eax, %eax
+; X32-NEXT: .p2align 4, 0x90
+; X32-NEXT: .LBB1_1: # %loop
+; X32-NEXT: # =>This Inner Loop Header: Depth=1
+; X32-NEXT: addl (%esi), %eax
+; X32-NEXT: addl (%esi,%ecx,4), %eax
+; X32-NEXT: addl (%esi,%ecx,8), %eax
+; X32-NEXT: addl (%esi,%ebx), %eax
+; X32-NEXT: movl %eax, (%esi)
+; X32-NEXT: addl %edi, %esi
+; X32-NEXT: cmpl %esi, %edx
+; X32-NEXT: jne .LBB1_1
+; X32-NEXT: # %bb.2: # %exit
+; X32-NEXT: popl %esi
+; X32-NEXT: popl %edi
+; X32-NEXT: popl %ebx
+; X32-NEXT: retl
entry:
br label %loop
loop:
@@ -93,20 +173,94 @@ exit:
; used to do, and exactly what we don't want to do. LSR's new IV
; chaining feature should now undo the damage.
;
-; X64: extrastride:
; We currently don't handle this on X64 because the sexts cause
; strange increment expressions like this:
; IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
;
-; X32: extrastride:
-; no spills in the preheader
-; X32-NOT: mov{{.*}}(%esp){{$}}
-; X32: %for.body{{$}}
-; no complex address modes
-; X32-NOT: (%{{[^)]+}},%{{[^)]+}},
-; no reloads
-; X32-NOT: (%esp)
+; For x32, no spills in the preheader, no complex address modes, no reloads.
+
define void @extrastride(i8* nocapture %main, i32 %main_stride, i32* nocapture %res, i32 %x, i32 %y, i32 %z) nounwind {
+; X64-LABEL: extrastride:
+; X64: # %bb.0: # %entry
+; X64-NEXT: pushq %rbp
+; X64-NEXT: pushq %r14
+; X64-NEXT: pushq %rbx
+; X64-NEXT: # kill: def %ecx killed %ecx def %rcx
+; X64-NEXT: # kill: def %esi killed %esi def %rsi
+; X64-NEXT: testl %r9d, %r9d
+; X64-NEXT: je .LBB2_3
+; X64-NEXT: # %bb.1: # %for.body.lr.ph
+; X64-NEXT: leal (%rsi,%rsi), %r14d
+; X64-NEXT: leal (%rsi,%rsi,2), %ebx
+; X64-NEXT: addl %esi, %ecx
+; X64-NEXT: leal (,%rsi,4), %eax
+; X64-NEXT: leal (%rcx,%rsi,4), %ebp
+; X64-NEXT: movslq %eax, %r10
+; X64-NEXT: movslq %ebx, %r11
+; X64-NEXT: movslq %r14d, %rbx
+; X64-NEXT: movslq %esi, %rsi
+; X64-NEXT: movslq %r8d, %rcx
+; X64-NEXT: shlq $2, %rcx
+; X64-NEXT: movslq %ebp, %rax
+; X64-NEXT: .p2align 4, 0x90
+; X64-NEXT: .LBB2_2: # %for.body
+; X64-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-NEXT: movl (%rdi,%rsi), %ebp
+; X64-NEXT: addl (%rdi), %ebp
+; X64-NEXT: addl (%rdi,%rbx), %ebp
+; X64-NEXT: addl (%rdi,%r11), %ebp
+; X64-NEXT: addl (%rdi,%r10), %ebp
+; X64-NEXT: movl %ebp, (%rdx)
+; X64-NEXT: addq %rax, %rdi
+; X64-NEXT: addq %rcx, %rdx
+; X64-NEXT: decl %r9d
+; X64-NEXT: jne .LBB2_2
+; X64-NEXT: .LBB2_3: # %for.end
+; X64-NEXT: popq %rbx
+; X64-NEXT: popq %r14
+; X64-NEXT: popq %rbp
+; X64-NEXT: retq
+;
+; X32-LABEL: extrastride:
+; X32: # %bb.0: # %entry
+; X32-NEXT: pushl %ebp
+; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %edi
+; X32-NEXT: pushl %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: testl %eax, %eax
+; X32-NEXT: je .LBB2_3
+; X32-NEXT: # %bb.1: # %for.body.lr.ph
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ebx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edi
+; X32-NEXT: addl %esi, %edi
+; X32-NEXT: shll $2, %ecx
+; X32-NEXT: .p2align 4, 0x90
+; X32-NEXT: .LBB2_2: # %for.body
+; X32-NEXT: # =>This Inner Loop Header: Depth=1
+; X32-NEXT: movl (%ebx,%esi), %ebp
+; X32-NEXT: addl (%ebx), %ebp
+; X32-NEXT: leal (%ebx,%esi), %ebx
+; X32-NEXT: addl (%esi,%ebx), %ebp
+; X32-NEXT: leal (%ebx,%esi), %ebx
+; X32-NEXT: addl (%esi,%ebx), %ebp
+; X32-NEXT: leal (%ebx,%esi), %ebx
+; X32-NEXT: addl (%esi,%ebx), %ebp
+; X32-NEXT: movl %ebp, (%edx)
+; X32-NEXT: leal (%ebx,%esi), %ebx
+; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: addl %ecx, %edx
+; X32-NEXT: decl %eax
+; X32-NEXT: jne .LBB2_2
+; X32-NEXT: .LBB2_3: # %for.end
+; X32-NEXT: popl %esi
+; X32-NEXT: popl %edi
+; X32-NEXT: popl %ebx
+; X32-NEXT: popl %ebp
+; X32-NEXT: retl
entry:
%cmp8 = icmp eq i32 %z, 0
br i1 %cmp8, label %for.end, label %for.body.lr.ph
@@ -158,13 +312,71 @@ for.end:
; }
; where 's' can be folded into the addressing mode.
; Consequently, we should *not* form any chains.
-;
-; X64: foldedidx:
-; X64: movzbl -3(
-;
-; X32: foldedidx:
-; X32: movzbl 400(
+
define void @foldedidx(i8* nocapture %a, i8* nocapture %b, i8* nocapture %c) nounwind ssp {
+; X64-LABEL: foldedidx:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movl $3, %eax
+; X64-NEXT: .p2align 4, 0x90
+; X64-NEXT: .LBB3_1: # %for.body
+; X64-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-NEXT: movzbl -3(%rdi,%rax), %r8d
+; X64-NEXT: movzbl -3(%rsi,%rax), %ecx
+; X64-NEXT: addl %r8d, %ecx
+; X64-NEXT: movb %cl, -3(%rdx,%rax)
+; X64-NEXT: movzbl -2(%rdi,%rax), %r8d
+; X64-NEXT: movzbl -2(%rsi,%rax), %ecx
+; X64-NEXT: addl %r8d, %ecx
+; X64-NEXT: movb %cl, -2(%rdx,%rax)
+; X64-NEXT: movzbl -1(%rdi,%rax), %r8d
+; X64-NEXT: movzbl -1(%rsi,%rax), %ecx
+; X64-NEXT: addl %r8d, %ecx
+; X64-NEXT: movb %cl, -1(%rdx,%rax)
+; X64-NEXT: movzbl (%rdi,%rax), %r8d
+; X64-NEXT: movzbl (%rsi,%rax), %ecx
+; X64-NEXT: addl %r8d, %ecx
+; X64-NEXT: movb %cl, (%rdx,%rax)
+; X64-NEXT: addq $4, %rax
+; X64-NEXT: cmpl $403, %eax # imm = 0x193
+; X64-NEXT: jne .LBB3_1
+; X64-NEXT: # %bb.2: # %for.end
+; X64-NEXT: retq
+;
+; X32-LABEL: foldedidx:
+; X32: # %bb.0: # %entry
+; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %edi
+; X32-NEXT: pushl %esi
+; X32-NEXT: movl $-400, %eax # imm = 0xFE70
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X32-NEXT: .p2align 4, 0x90
+; X32-NEXT: .LBB3_1: # %for.body
+; X32-NEXT: # =>This Inner Loop Header: Depth=1
+; X32-NEXT: movzbl 400(%esi,%eax), %edi
+; X32-NEXT: movzbl 400(%edx,%eax), %ebx
+; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: movb %bl, 400(%ecx,%eax)
+; X32-NEXT: movzbl 401(%esi,%eax), %edi
+; X32-NEXT: movzbl 401(%edx,%eax), %ebx
+; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: movb %bl, 401(%ecx,%eax)
+; X32-NEXT: movzbl 402(%esi,%eax), %edi
+; X32-NEXT: movzbl 402(%edx,%eax), %ebx
+; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: movb %bl, 402(%ecx,%eax)
+; X32-NEXT: movzbl 403(%esi,%eax), %edi
+; X32-NEXT: movzbl 403(%edx,%eax), %ebx
+; X32-NEXT: addl %edi, %ebx
+; X32-NEXT: movb %bl, 403(%ecx,%eax)
+; X32-NEXT: addl $4, %eax
+; X32-NEXT: jne .LBB3_1
+; X32-NEXT: # %bb.2: # %for.end
+; X32-NEXT: popl %esi
+; X32-NEXT: popl %edi
+; X32-NEXT: popl %ebx
+; X32-NEXT: retl
entry:
br label %for.body
@@ -223,27 +435,49 @@ for.end:
; @multioper tests instructions with multiple IV user operands. We
; should be able to chain them independent of each other.
-;
-; X64: @multioper
-; X64: %for.body
-; X64: movl %{{.*}},4)
-; X64-NEXT: leal 1(
-; X64-NEXT: movl %{{.*}},4)
-; X64-NEXT: leal 2(
-; X64-NEXT: movl %{{.*}},4)
-; X64-NEXT: leal 3(
-; X64-NEXT: movl %{{.*}},4)
-;
-; X32: @multioper
-; X32: %for.body
-; X32: movl %{{.*}},4)
-; X32-NEXT: leal 1(
-; X32-NEXT: movl %{{.*}},4)
-; X32-NEXT: leal 2(
-; X32-NEXT: movl %{{.*}},4)
-; X32-NEXT: leal 3(
-; X32-NEXT: movl %{{.*}},4)
+
define void @multioper(i32* %a, i32 %n) nounwind {
+; X64-LABEL: multioper:
+; X64: # %bb.0: # %entry
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: .p2align 4, 0x90
+; X64-NEXT: .LBB4_1: # %for.body
+; X64-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-NEXT: movl %eax, (%rdi,%rax,4)
+; X64-NEXT: leal 1(%rax), %ecx
+; X64-NEXT: movl %ecx, 4(%rdi,%rax,4)
+; X64-NEXT: leal 2(%rax), %ecx
+; X64-NEXT: movl %ecx, 8(%rdi,%rax,4)
+; X64-NEXT: leal 3(%rax), %ecx
+; X64-NEXT: movl %ecx, 12(%rdi,%rax,4)
+; X64-NEXT: addq $4, %rax
+; X64-NEXT: cmpl %esi, %eax
+; X64-NEXT: jl .LBB4_1
+; X64-NEXT: # %bb.2: # %exit
+; X64-NEXT: retq
+;
+; X32-LABEL: multioper:
+; X32: # %bb.0: # %entry
+; X32-NEXT: pushl %esi
+; X32-NEXT: xorl %eax, %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: .p2align 4, 0x90
+; X32-NEXT: .LBB4_1: # %for.body
+; X32-NEXT: # =>This Inner Loop Header: Depth=1
+; X32-NEXT: movl %eax, (%edx,%eax,4)
+; X32-NEXT: leal 1(%eax), %esi
+; X32-NEXT: movl %esi, 4(%edx,%eax,4)
+; X32-NEXT: leal 2(%eax), %esi
+; X32-NEXT: movl %esi, 8(%edx,%eax,4)
+; X32-NEXT: leal 3(%eax), %esi
+; X32-NEXT: movl %esi, 12(%edx,%eax,4)
+; X32-NEXT: addl $4, %eax
+; X32-NEXT: cmpl %ecx, %eax
+; X32-NEXT: jl .LBB4_1
+; X32-NEXT: # %bb.2: # %exit
+; X32-NEXT: popl %esi
+; X32-NEXT: retl
entry:
br label %for.body
@@ -272,12 +506,51 @@ exit:
; @testCmpZero has a ICmpZero LSR use that should not be hidden from
; LSR. Profitable chains should have more than one nonzero increment
; anyway.
-;
-; X32: @testCmpZero
-; X32: %for.body82.us
-; X32: cmp
-; X32: jne
+
define void @testCmpZero(i8* %src, i8* %dst, i32 %srcidx, i32 %dstidx, i32 %len) nounwind ssp {
+; X64-LABEL: testCmpZero:
+; X64: # %bb.0: # %entry
+; X64-NEXT: movslq %edx, %rdx
+; X64-NEXT: addq %rdx, %rdi
+; X64-NEXT: movslq %ecx, %r9
+; X64-NEXT: addq %rsi, %r9
+; X64-NEXT: addl %edx, %r8d
+; X64-NEXT: movslq %r8d, %rcx
+; X64-NEXT: subq %rdx, %rcx
+; X64-NEXT: xorl %edx, %edx
+; X64-NEXT: .p2align 4, 0x90
+; X64-NEXT: .LBB5_1: # %for.body82.us
+; X64-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-NEXT: movzbl (%r9,%rdx,4), %eax
+; X64-NEXT: movb %al, (%rdi,%rdx)
+; X64-NEXT: incq %rdx
+; X64-NEXT: cmpq %rdx, %rcx
+; X64-NEXT: jne .LBB5_1
+; X64-NEXT: # %bb.2: # %return
+; X64-NEXT: retq
+;
+; X32-LABEL: testCmpZero:
+; X32: # %bb.0: # %entry
+; X32-NEXT: pushl %ebx
+; X32-NEXT: pushl %esi
+; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: addl {{[0-9]+}}(%esp), %ecx
+; X32-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: addl {{[0-9]+}}(%esp), %edx
+; X32-NEXT: xorl %esi, %esi
+; X32-NEXT: .p2align 4, 0x90
+; X32-NEXT: .LBB5_1: # %for.body82.us
+; X32-NEXT: # =>This Inner Loop Header: Depth=1
+; X32-NEXT: movzbl (%edx,%esi,4), %ebx
+; X32-NEXT: movb %bl, (%ecx,%esi)
+; X32-NEXT: incl %esi
+; X32-NEXT: cmpl %esi, %eax
+; X32-NEXT: jne .LBB5_1
+; X32-NEXT: # %bb.2: # %return
+; X32-NEXT: popl %esi
+; X32-NEXT: popl %ebx
+; X32-NEXT: retl
entry:
%dest0 = getelementptr inbounds i8, i8* %src, i32 %srcidx
%source0 = getelementptr inbounds i8, i8* %dst, i32 %dstidx
Modified: llvm/trunk/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll?rev=323571&r1=323570&r2=323571&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll (original)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/X86/lsr-insns-1.ll Fri Jan 26 14:06:07 2018
@@ -1,13 +1,11 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -S | FileCheck %s -check-prefix=BOTH -check-prefix=INSN
; RUN: opt < %s -loop-reduce -mtriple=x86_64 -lsr-insns-cost=false -S | FileCheck %s -check-prefix=BOTH -check-prefix=REGS
-; RUN: llc < %s -O2 -march=x86-64 -lsr-insns-cost -asm-verbose=0 | FileCheck %s
+; RUN: llc < %s -O2 -mtriple=x86_64-unknown-unknown -lsr-insns-cost | FileCheck %s
; OPT test checks that LSR optimize compare for static counter to compare with 0.
-; BOTH: for.body:
-; INSN: icmp eq i64 %lsr.iv.next, 0
-; REGS: icmp eq i64 %indvars.iv.next, 1024
-
; LLC test checks that LSR optimize compare for static counter.
; That means that instead of creating the following:
; movl %ecx, (%rdx,%rax,4)
@@ -20,17 +18,67 @@
; movl %ecx, 4096(%rdx,%rax,4)
; incq %rax
-; CHECK: LBB0_1:
-; CHECK-NEXT: movl 4096(%{{.+}},[[REG:%[0-9a-z]+]]
-; CHECK-NEXT: addl 4096(%{{.+}},[[REG]]
-; CHECK-NEXT: movl %{{.+}}, 4096(%{{.+}},[[REG]]
-; CHECK-NOT: cmp
-; CHECK: jne
-
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-; Function Attrs: norecurse nounwind uwtable
define void @foo(i32* nocapture readonly %x, i32* nocapture readonly %y, i32* nocapture %q) {
+; INSN-LABEL: @foo(
+; INSN-NEXT: entry:
+; INSN-NEXT: [[Q1:%.*]] = bitcast i32* [[Q:%.*]] to i8*
+; INSN-NEXT: [[Y3:%.*]] = bitcast i32* [[Y:%.*]] to i8*
+; INSN-NEXT: [[X7:%.*]] = bitcast i32* [[X:%.*]] to i8*
+; INSN-NEXT: br label [[FOR_BODY:%.*]]
+; INSN: for.cond.cleanup:
+; INSN-NEXT: ret void
+; INSN: for.body:
+; INSN-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY]] ], [ -4096, [[ENTRY:%.*]] ]
+; INSN-NEXT: [[UGLYGEP8:%.*]] = getelementptr i8, i8* [[X7]], i64 [[LSR_IV]]
+; INSN-NEXT: [[UGLYGEP89:%.*]] = bitcast i8* [[UGLYGEP8]] to i32*
+; INSN-NEXT: [[SCEVGEP10:%.*]] = getelementptr i32, i32* [[UGLYGEP89]], i64 1024
+; INSN-NEXT: [[TMP:%.*]] = load i32, i32* [[SCEVGEP10]], align 4
+; INSN-NEXT: [[UGLYGEP4:%.*]] = getelementptr i8, i8* [[Y3]], i64 [[LSR_IV]]
+; INSN-NEXT: [[UGLYGEP45:%.*]] = bitcast i8* [[UGLYGEP4]] to i32*
+; INSN-NEXT: [[SCEVGEP6:%.*]] = getelementptr i32, i32* [[UGLYGEP45]], i64 1024
+; INSN-NEXT: [[TMP1:%.*]] = load i32, i32* [[SCEVGEP6]], align 4
+; INSN-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP]]
+; INSN-NEXT: [[UGLYGEP:%.*]] = getelementptr i8, i8* [[Q1]], i64 [[LSR_IV]]
+; INSN-NEXT: [[UGLYGEP2:%.*]] = bitcast i8* [[UGLYGEP]] to i32*
+; INSN-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[UGLYGEP2]], i64 1024
+; INSN-NEXT: store i32 [[ADD]], i32* [[SCEVGEP]], align 4
+; INSN-NEXT: [[LSR_IV_NEXT]] = add nsw i64 [[LSR_IV]], 4
+; INSN-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[LSR_IV_NEXT]], 0
+; INSN-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+;
+; REGS-LABEL: @foo(
+; REGS-NEXT: entry:
+; REGS-NEXT: br label [[FOR_BODY:%.*]]
+; REGS: for.cond.cleanup:
+; REGS-NEXT: ret void
+; REGS: for.body:
+; REGS-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
+; REGS-NEXT: [[SCEVGEP2:%.*]] = getelementptr i32, i32* [[X:%.*]], i64 [[INDVARS_IV]]
+; REGS-NEXT: [[TMP:%.*]] = load i32, i32* [[SCEVGEP2]], align 4
+; REGS-NEXT: [[SCEVGEP1:%.*]] = getelementptr i32, i32* [[Y:%.*]], i64 [[INDVARS_IV]]
+; REGS-NEXT: [[TMP1:%.*]] = load i32, i32* [[SCEVGEP1]], align 4
+; REGS-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], [[TMP]]
+; REGS-NEXT: [[SCEVGEP:%.*]] = getelementptr i32, i32* [[Q:%.*]], i64 [[INDVARS_IV]]
+; REGS-NEXT: store i32 [[ADD]], i32* [[SCEVGEP]], align 4
+; REGS-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; REGS-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1024
+; REGS-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_BODY]]
+;
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq $-4096, %rax # imm = 0xF000
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: .LBB0_1: # %for.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: movl 4096(%rsi,%rax), %ecx
+; CHECK-NEXT: addl 4096(%rdi,%rax), %ecx
+; CHECK-NEXT: movl %ecx, 4096(%rdx,%rax)
+; CHECK-NEXT: addq $4, %rax
+; CHECK-NEXT: jne .LBB0_1
+; CHECK-NEXT: # %bb.2: # %for.cond.cleanup
+; CHECK-NEXT: retq
entry:
br label %for.body
@@ -50,3 +98,4 @@ for.body:
%exitcond = icmp eq i64 %indvars.iv.next, 1024
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
+
Modified: llvm/trunk/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll?rev=323571&r1=323570&r2=323571&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll (original)
+++ llvm/trunk/test/Transforms/LoopStrengthReduce/X86/nested-loop.ll Fri Jan 26 14:06:07 2018
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -loop-reduce -S < %s | FileCheck %s
+
; Check when we use an outerloop induction variable inside of an innerloop
; induction value expr, LSR can still choose to use single induction variable
; for the innerloop and share it in multiple induction value exprs.
@@ -8,6 +9,46 @@ target datalayout = "e-m:e-i64:64-f80:12
target triple = "x86_64-unknown-linux-gnu"
define void @foo(i32 %size, i32 %nsteps, i32 %hsize, i32* %lined, i8* %maxarray) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP215:%.*]] = icmp sgt i32 [[SIZE:%.*]], 1
+; CHECK-NEXT: [[T0:%.*]] = zext i32 [[SIZE]] to i64
+; CHECK-NEXT: [[T1:%.*]] = sext i32 [[NSTEPS:%.*]] to i64
+; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[T0]], -1
+; CHECK-NEXT: [[TMP1:%.*]] = inttoptr i64 [[TMP0]] to i8*
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: [[LSR_IV1:%.*]] = phi i64 [ [[LSR_IV_NEXT2:%.*]], [[FOR_INC:%.*]] ], [ 1, [[ENTRY:%.*]] ]
+; CHECK-NEXT: [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC]] ], [ 0, [[ENTRY]] ]
+; CHECK-NEXT: [[LSR_IV13:%.*]] = inttoptr i64 [[LSR_IV1]] to i8*
+; CHECK-NEXT: br i1 [[CMP215]], label [[FOR_BODY2_PREHEADER:%.*]], label [[FOR_INC]]
+; CHECK: for.body2.preheader:
+; CHECK-NEXT: br label [[FOR_BODY2:%.*]]
+; CHECK: for.body2:
+; CHECK-NEXT: [[LSR_IV4:%.*]] = phi i8* [ [[SCEVGEP:%.*]], [[FOR_BODY2]] ], [ [[MAXARRAY:%.*]], [[FOR_BODY2_PREHEADER]] ]
+; CHECK-NEXT: [[LSR_IV:%.*]] = phi i64 [ [[LSR_IV_NEXT:%.*]], [[FOR_BODY2]] ], [ [[TMP0]], [[FOR_BODY2_PREHEADER]] ]
+; CHECK-NEXT: [[LSR_IV45:%.*]] = ptrtoint i8* [[LSR_IV4]] to i64
+; CHECK-NEXT: [[SCEVGEP8:%.*]] = getelementptr i8, i8* [[LSR_IV4]], i64 1
+; CHECK-NEXT: [[V1:%.*]] = load i8, i8* [[SCEVGEP8]], align 1
+; CHECK-NEXT: [[SCEVGEP7:%.*]] = getelementptr i8, i8* [[TMP1]], i64 [[LSR_IV45]]
+; CHECK-NEXT: [[V2:%.*]] = load i8, i8* [[SCEVGEP7]], align 1
+; CHECK-NEXT: [[TMPV:%.*]] = xor i8 [[V1]], [[V2]]
+; CHECK-NEXT: [[SCEVGEP6:%.*]] = getelementptr i8, i8* [[LSR_IV13]], i64 [[LSR_IV45]]
+; CHECK-NEXT: store i8 [[TMPV]], i8* [[SCEVGEP6]], align 1
+; CHECK-NEXT: [[LSR_IV_NEXT]] = add i64 [[LSR_IV]], -1
+; CHECK-NEXT: [[SCEVGEP]] = getelementptr i8, i8* [[LSR_IV4]], i64 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp ne i64 [[LSR_IV_NEXT]], 0
+; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_BODY2]], label [[FOR_INC_LOOPEXIT:%.*]]
+; CHECK: for.inc.loopexit:
+; CHECK-NEXT: br label [[FOR_INC]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
+; CHECK-NEXT: [[LSR_IV_NEXT2]] = add nuw nsw i64 [[LSR_IV1]], [[T0]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[INDVARS_IV_NEXT3]], [[T1]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_END_LOOPEXIT:%.*]]
+; CHECK: for.end.loopexit:
+; CHECK-NEXT: ret void
+;
entry:
%cmp215 = icmp sgt i32 %size, 1
%t0 = zext i32 %size to i64
@@ -25,20 +66,6 @@ for.body2.preheader:
; Check LSR only generates two induction variables for for.body2 one for compare and
; one to shared by multiple array accesses.
-; CHECK: for.body2:
-; CHECK-NEXT: [[LSRAR:%[^,]+]] = phi i8* [ %scevgep, %for.body2 ], [ %maxarray, %for.body2.preheader ]
-; CHECK-NEXT: [[LSR:%[^,]+]] = phi i64 [ %lsr.iv.next, %for.body2 ], [ %0, %for.body2.preheader ]
-; CHECK-NOT: = phi i64 [ {{.*}}, %for.body2 ], [ {{.*}}, %for.body2.preheader ]
-; CHECK: [[LSRINT:%[^,]+]] = ptrtoint i8* [[LSRAR]] to i64
-; CHECK: [[SCEVGEP1:%[^,]+]] = getelementptr i8, i8* [[LSRAR]], i64 1
-; CHECK: {{.*}} = load i8, i8* [[SCEVGEP1]], align 1
-; CHECK: [[SCEVGEP2:%[^,]+]] = getelementptr i8, i8* %1, i64 [[LSRINT]]
-; CHECK: {{.*}} = load i8, i8* [[SCEVGEP2]], align 1
-; CHECK: [[SCEVGEP3:%[^,]+]] = getelementptr i8, i8* {{.*}}, i64 [[LSRINT]]
-; CHECK: store i8 {{.*}}, i8* [[SCEVGEP3]], align 1
-; CHECK: [[LSRNEXT:%[^,]+]] = add i64 [[LSR]], -1
-; CHECK: %exitcond = icmp ne i64 [[LSRNEXT]], 0
-; CHECK: br i1 %exitcond, label %for.body2, label %for.inc.loopexit
for.body2: ; preds = %for.body2.preheader, %for.body2
%indvars.iv = phi i64 [ 1, %for.body2.preheader ], [ %indvars.iv.next, %for.body2 ]
@@ -67,3 +94,4 @@ for.inc:
for.end.loopexit: ; preds = %for.inc
ret void
}
+
More information about the llvm-commits
mailing list