[llvm] r347195 - [X86] Add codegen tests for slow-shld scalar funnel shifts
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 19 04:29:41 PST 2018
Author: rksimon
Date: Mon Nov 19 04:29:41 2018
New Revision: 347195
URL: http://llvm.org/viewvc/llvm-project?rev=347195&view=rev
Log:
[X86] Add codegen tests for slow-shld scalar funnel shifts
Modified:
llvm/trunk/test/CodeGen/X86/fshl.ll
llvm/trunk/test/CodeGen/X86/fshr.ll
Modified: llvm/trunk/test/CodeGen/X86/fshl.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fshl.ll?rev=347195&r1=347194&r2=347195&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fshl.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fshl.ll Mon Nov 19 04:29:41 2018
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=CHECK,X86
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=CHECK,X86,X86-FAST
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+slow-shld | FileCheck %s --check-prefixes=CHECK,X86,X86-SLOW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,X64,X64-FAST
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+slow-shld | FileCheck %s --check-prefixes=CHECK,X64,X64-SLOW
declare i8 @llvm.fshl.i8(i8, i8, i8) nounwind readnone
declare i16 @llvm.fshl.i16(i16, i16, i16) nounwind readnone
@@ -54,65 +56,149 @@ define i8 @var_shift_i8(i8 %x, i8 %y, i8
}
define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind {
-; X86-LABEL: var_shift_i16:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andl $15, %ecx
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: shldw %cl, %si, %dx
-; X86-NEXT: testw %cx, %cx
-; X86-NEXT: je .LBB1_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: .LBB1_2:
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
-;
-; X64-LABEL: var_shift_i16:
-; X64: # %bb.0:
-; X64-NEXT: movl %edx, %ecx
-; X64-NEXT: andl $15, %ecx
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shldw %cl, %si, %ax
-; X64-NEXT: testw %cx, %cx
-; X64-NEXT: cmovel %edi, %eax
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: retq
+; X86-FAST-LABEL: var_shift_i16:
+; X86-FAST: # %bb.0:
+; X86-FAST-NEXT: pushl %esi
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %esi
+; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT: andl $15, %ecx
+; X86-FAST-NEXT: movl %eax, %edx
+; X86-FAST-NEXT: shldw %cl, %si, %dx
+; X86-FAST-NEXT: testw %cx, %cx
+; X86-FAST-NEXT: je .LBB1_2
+; X86-FAST-NEXT: # %bb.1:
+; X86-FAST-NEXT: movl %edx, %eax
+; X86-FAST-NEXT: .LBB1_2:
+; X86-FAST-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-FAST-NEXT: popl %esi
+; X86-FAST-NEXT: retl
+;
+; X86-SLOW-LABEL: var_shift_i16:
+; X86-SLOW: # %bb.0:
+; X86-SLOW-NEXT: pushl %edi
+; X86-SLOW-NEXT: pushl %esi
+; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %esi
+; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %edx
+; X86-SLOW-NEXT: andl $15, %edx
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT: movl %eax, %edi
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: shll %cl, %edi
+; X86-SLOW-NEXT: movl $16, %ecx
+; X86-SLOW-NEXT: subl %edx, %ecx
+; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: shrl %cl, %esi
+; X86-SLOW-NEXT: testw %dx, %dx
+; X86-SLOW-NEXT: je .LBB1_2
+; X86-SLOW-NEXT: # %bb.1:
+; X86-SLOW-NEXT: orl %esi, %edi
+; X86-SLOW-NEXT: movl %edi, %eax
+; X86-SLOW-NEXT: .LBB1_2:
+; X86-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-SLOW-NEXT: popl %esi
+; X86-SLOW-NEXT: popl %edi
+; X86-SLOW-NEXT: retl
+;
+; X64-FAST-LABEL: var_shift_i16:
+; X64-FAST: # %bb.0:
+; X64-FAST-NEXT: movl %edx, %ecx
+; X64-FAST-NEXT: andl $15, %ecx
+; X64-FAST-NEXT: movl %edi, %eax
+; X64-FAST-NEXT: shldw %cl, %si, %ax
+; X64-FAST-NEXT: testw %cx, %cx
+; X64-FAST-NEXT: cmovel %edi, %eax
+; X64-FAST-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-FAST-NEXT: retq
+;
+; X64-SLOW-LABEL: var_shift_i16:
+; X64-SLOW: # %bb.0:
+; X64-SLOW-NEXT: movzwl %si, %eax
+; X64-SLOW-NEXT: andl $15, %edx
+; X64-SLOW-NEXT: movl %edi, %esi
+; X64-SLOW-NEXT: movl %edx, %ecx
+; X64-SLOW-NEXT: shll %cl, %esi
+; X64-SLOW-NEXT: movl $16, %ecx
+; X64-SLOW-NEXT: subl %edx, %ecx
+; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-SLOW-NEXT: shrl %cl, %eax
+; X64-SLOW-NEXT: orl %esi, %eax
+; X64-SLOW-NEXT: testw %dx, %dx
+; X64-SLOW-NEXT: cmovel %edi, %eax
+; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-SLOW-NEXT: retq
%tmp = tail call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 %z)
ret i16 %tmp
}
define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind {
-; X86-LABEL: var_shift_i32:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andl $31, %ecx
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: shldl %cl, %esi, %edx
-; X86-NEXT: testl %ecx, %ecx
-; X86-NEXT: je .LBB2_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: .LBB2_2:
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
-;
-; X64-LABEL: var_shift_i32:
-; X64: # %bb.0:
-; X64-NEXT: movl %edx, %ecx
-; X64-NEXT: andl $31, %ecx
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shldl %cl, %esi, %eax
-; X64-NEXT: testl %ecx, %ecx
-; X64-NEXT: cmovel %edi, %eax
-; X64-NEXT: retq
+; X86-FAST-LABEL: var_shift_i32:
+; X86-FAST: # %bb.0:
+; X86-FAST-NEXT: pushl %esi
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT: andl $31, %ecx
+; X86-FAST-NEXT: movl %eax, %edx
+; X86-FAST-NEXT: shldl %cl, %esi, %edx
+; X86-FAST-NEXT: testl %ecx, %ecx
+; X86-FAST-NEXT: je .LBB2_2
+; X86-FAST-NEXT: # %bb.1:
+; X86-FAST-NEXT: movl %edx, %eax
+; X86-FAST-NEXT: .LBB2_2:
+; X86-FAST-NEXT: popl %esi
+; X86-FAST-NEXT: retl
+;
+; X86-SLOW-LABEL: var_shift_i32:
+; X86-SLOW: # %bb.0:
+; X86-SLOW-NEXT: pushl %edi
+; X86-SLOW-NEXT: pushl %esi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SLOW-NEXT: andl $31, %edx
+; X86-SLOW-NEXT: movl %eax, %edi
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: shll %cl, %edi
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: negl %ecx
+; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: shrl %cl, %esi
+; X86-SLOW-NEXT: testl %edx, %edx
+; X86-SLOW-NEXT: je .LBB2_2
+; X86-SLOW-NEXT: # %bb.1:
+; X86-SLOW-NEXT: orl %esi, %edi
+; X86-SLOW-NEXT: movl %edi, %eax
+; X86-SLOW-NEXT: .LBB2_2:
+; X86-SLOW-NEXT: popl %esi
+; X86-SLOW-NEXT: popl %edi
+; X86-SLOW-NEXT: retl
+;
+; X64-FAST-LABEL: var_shift_i32:
+; X64-FAST: # %bb.0:
+; X64-FAST-NEXT: movl %edx, %ecx
+; X64-FAST-NEXT: andl $31, %ecx
+; X64-FAST-NEXT: movl %edi, %eax
+; X64-FAST-NEXT: shldl %cl, %esi, %eax
+; X64-FAST-NEXT: testl %ecx, %ecx
+; X64-FAST-NEXT: cmovel %edi, %eax
+; X64-FAST-NEXT: retq
+;
+; X64-SLOW-LABEL: var_shift_i32:
+; X64-SLOW: # %bb.0:
+; X64-SLOW-NEXT: movl %esi, %eax
+; X64-SLOW-NEXT: andl $31, %edx
+; X64-SLOW-NEXT: movl %edi, %esi
+; X64-SLOW-NEXT: movl %edx, %ecx
+; X64-SLOW-NEXT: shll %cl, %esi
+; X64-SLOW-NEXT: movl %edx, %ecx
+; X64-SLOW-NEXT: negl %ecx
+; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-SLOW-NEXT: shrl %cl, %eax
+; X64-SLOW-NEXT: orl %esi, %eax
+; X64-SLOW-NEXT: testl %edx, %edx
+; X64-SLOW-NEXT: cmovel %edi, %eax
+; X64-SLOW-NEXT: retq
%tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
ret i32 %tmp
}
@@ -174,15 +260,31 @@ define i64 @var_shift_i64(i64 %x, i64 %y
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
-; X64-LABEL: var_shift_i64:
-; X64: # %bb.0:
-; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: andl $63, %ecx
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shldq %cl, %rsi, %rax
-; X64-NEXT: testq %rcx, %rcx
-; X64-NEXT: cmoveq %rdi, %rax
-; X64-NEXT: retq
+; X64-FAST-LABEL: var_shift_i64:
+; X64-FAST: # %bb.0:
+; X64-FAST-NEXT: movq %rdx, %rcx
+; X64-FAST-NEXT: andl $63, %ecx
+; X64-FAST-NEXT: movq %rdi, %rax
+; X64-FAST-NEXT: shldq %cl, %rsi, %rax
+; X64-FAST-NEXT: testq %rcx, %rcx
+; X64-FAST-NEXT: cmoveq %rdi, %rax
+; X64-FAST-NEXT: retq
+;
+; X64-SLOW-LABEL: var_shift_i64:
+; X64-SLOW: # %bb.0:
+; X64-SLOW-NEXT: movq %rsi, %rax
+; X64-SLOW-NEXT: andl $63, %edx
+; X64-SLOW-NEXT: movq %rdi, %rsi
+; X64-SLOW-NEXT: movl %edx, %ecx
+; X64-SLOW-NEXT: shlq %cl, %rsi
+; X64-SLOW-NEXT: movl %edx, %ecx
+; X64-SLOW-NEXT: negl %ecx
+; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-SLOW-NEXT: shrq %cl, %rax
+; X64-SLOW-NEXT: orq %rsi, %rax
+; X64-SLOW-NEXT: testq %rdx, %rdx
+; X64-SLOW-NEXT: cmoveq %rdi, %rax
+; X64-SLOW-NEXT: retq
%tmp = tail call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
ret i64 %tmp
}
@@ -214,55 +316,115 @@ define i8 @const_shift_i8(i8 %x, i8 %y)
}
define i16 @const_shift_i16(i16 %x, i16 %y) nounwind {
-; X86-LABEL: const_shift_i16:
-; X86: # %bb.0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: shldw $7, %cx, %ax
-; X86-NEXT: retl
-;
-; X64-LABEL: const_shift_i16:
-; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shldw $7, %si, %ax
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: retq
+; X86-FAST-LABEL: const_shift_i16:
+; X86-FAST: # %bb.0:
+; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-FAST-NEXT: shldw $7, %cx, %ax
+; X86-FAST-NEXT: retl
+;
+; X86-SLOW-LABEL: const_shift_i16:
+; X86-SLOW: # %bb.0:
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT: shrl $9, %ecx
+; X86-SLOW-NEXT: shll $7, %eax
+; X86-SLOW-NEXT: orl %ecx, %eax
+; X86-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-SLOW-NEXT: retl
+;
+; X64-FAST-LABEL: const_shift_i16:
+; X64-FAST: # %bb.0:
+; X64-FAST-NEXT: movl %edi, %eax
+; X64-FAST-NEXT: shldw $7, %si, %ax
+; X64-FAST-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-FAST-NEXT: retq
+;
+; X64-SLOW-LABEL: const_shift_i16:
+; X64-SLOW: # %bb.0:
+; X64-SLOW-NEXT: movzwl %si, %eax
+; X64-SLOW-NEXT: shll $7, %edi
+; X64-SLOW-NEXT: shrl $9, %eax
+; X64-SLOW-NEXT: orl %edi, %eax
+; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-SLOW-NEXT: retq
%tmp = tail call i16 @llvm.fshl.i16(i16 %x, i16 %y, i16 7)
ret i16 %tmp
}
define i32 @const_shift_i32(i32 %x, i32 %y) nounwind {
-; X86-LABEL: const_shift_i32:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: shldl $7, %ecx, %eax
-; X86-NEXT: retl
-;
-; X64-LABEL: const_shift_i32:
-; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shldl $7, %esi, %eax
-; X64-NEXT: retq
+; X86-FAST-LABEL: const_shift_i32:
+; X86-FAST: # %bb.0:
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-FAST-NEXT: shldl $7, %ecx, %eax
+; X86-FAST-NEXT: retl
+;
+; X86-SLOW-LABEL: const_shift_i32:
+; X86-SLOW: # %bb.0:
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT: shrl $25, %ecx
+; X86-SLOW-NEXT: shll $7, %eax
+; X86-SLOW-NEXT: orl %ecx, %eax
+; X86-SLOW-NEXT: retl
+;
+; X64-FAST-LABEL: const_shift_i32:
+; X64-FAST: # %bb.0:
+; X64-FAST-NEXT: movl %edi, %eax
+; X64-FAST-NEXT: shldl $7, %esi, %eax
+; X64-FAST-NEXT: retq
+;
+; X64-SLOW-LABEL: const_shift_i32:
+; X64-SLOW: # %bb.0:
+; X64-SLOW-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-SLOW-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-SLOW-NEXT: shrl $25, %esi
+; X64-SLOW-NEXT: shll $7, %edi
+; X64-SLOW-NEXT: leal (%rdi,%rsi), %eax
+; X64-SLOW-NEXT: retq
%tmp = tail call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 7)
ret i32 %tmp
}
define i64 @const_shift_i64(i64 %x, i64 %y) nounwind {
-; X86-LABEL: const_shift_i64:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: shrdl $25, %ecx, %eax
-; X86-NEXT: shldl $7, %ecx, %edx
-; X86-NEXT: retl
-;
-; X64-LABEL: const_shift_i64:
-; X64: # %bb.0:
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shldq $7, %rsi, %rax
-; X64-NEXT: retq
+; X86-FAST-LABEL: const_shift_i64:
+; X86-FAST: # %bb.0:
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-FAST-NEXT: shrdl $25, %ecx, %eax
+; X86-FAST-NEXT: shldl $7, %ecx, %edx
+; X86-FAST-NEXT: retl
+;
+; X86-SLOW-LABEL: const_shift_i64:
+; X86-SLOW: # %bb.0:
+; X86-SLOW-NEXT: pushl %esi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-SLOW-NEXT: shrl $25, %esi
+; X86-SLOW-NEXT: movl %ecx, %eax
+; X86-SLOW-NEXT: shll $7, %eax
+; X86-SLOW-NEXT: orl %esi, %eax
+; X86-SLOW-NEXT: shrl $25, %ecx
+; X86-SLOW-NEXT: shll $7, %edx
+; X86-SLOW-NEXT: orl %ecx, %edx
+; X86-SLOW-NEXT: popl %esi
+; X86-SLOW-NEXT: retl
+;
+; X64-FAST-LABEL: const_shift_i64:
+; X64-FAST: # %bb.0:
+; X64-FAST-NEXT: movq %rdi, %rax
+; X64-FAST-NEXT: shldq $7, %rsi, %rax
+; X64-FAST-NEXT: retq
+;
+; X64-SLOW-LABEL: const_shift_i64:
+; X64-SLOW: # %bb.0:
+; X64-SLOW-NEXT: shrq $57, %rsi
+; X64-SLOW-NEXT: shlq $7, %rdi
+; X64-SLOW-NEXT: leaq (%rdi,%rsi), %rax
+; X64-SLOW-NEXT: retq
%tmp = tail call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 7)
ret i64 %tmp
}
Modified: llvm/trunk/test/CodeGen/X86/fshr.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fshr.ll?rev=347195&r1=347194&r2=347195&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fshr.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fshr.ll Mon Nov 19 04:29:41 2018
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=CHECK,X86
-; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,X64
+; RUN: llc < %s -mtriple=i686-unknown-unknown | FileCheck %s --check-prefixes=CHECK,X86,X86-FAST
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+slow-shld | FileCheck %s --check-prefixes=CHECK,X86,X86-SLOW
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefixes=CHECK,X64,X64-FAST
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+slow-shld | FileCheck %s --check-prefixes=CHECK,X64,X64-SLOW
declare i8 @llvm.fshr.i8(i8, i8, i8) nounwind readnone
declare i16 @llvm.fshr.i16(i16, i16, i16) nounwind readnone
@@ -54,65 +56,148 @@ define i8 @var_shift_i8(i8 %x, i8 %y, i8
}
define i16 @var_shift_i16(i16 %x, i16 %y, i16 %z) nounwind {
-; X86-LABEL: var_shift_i16:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andl $15, %ecx
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: shrdw %cl, %si, %dx
-; X86-NEXT: testw %cx, %cx
-; X86-NEXT: je .LBB1_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: .LBB1_2:
-; X86-NEXT: # kill: def $ax killed $ax killed $eax
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
-;
-; X64-LABEL: var_shift_i16:
-; X64: # %bb.0:
-; X64-NEXT: movl %edx, %ecx
-; X64-NEXT: andl $15, %ecx
-; X64-NEXT: movl %esi, %eax
-; X64-NEXT: shrdw %cl, %di, %ax
-; X64-NEXT: testw %cx, %cx
-; X64-NEXT: cmovel %esi, %eax
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: retq
+; X86-FAST-LABEL: var_shift_i16:
+; X86-FAST: # %bb.0:
+; X86-FAST-NEXT: pushl %esi
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %esi
+; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT: andl $15, %ecx
+; X86-FAST-NEXT: movl %eax, %edx
+; X86-FAST-NEXT: shrdw %cl, %si, %dx
+; X86-FAST-NEXT: testw %cx, %cx
+; X86-FAST-NEXT: je .LBB1_2
+; X86-FAST-NEXT: # %bb.1:
+; X86-FAST-NEXT: movl %edx, %eax
+; X86-FAST-NEXT: .LBB1_2:
+; X86-FAST-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-FAST-NEXT: popl %esi
+; X86-FAST-NEXT: retl
+;
+; X86-SLOW-LABEL: var_shift_i16:
+; X86-SLOW: # %bb.0:
+; X86-SLOW-NEXT: pushl %edi
+; X86-SLOW-NEXT: pushl %esi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %edx
+; X86-SLOW-NEXT: andl $15, %edx
+; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT: movl %eax, %edi
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: shrl %cl, %edi
+; X86-SLOW-NEXT: movl $16, %ecx
+; X86-SLOW-NEXT: subl %edx, %ecx
+; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: shll %cl, %esi
+; X86-SLOW-NEXT: testw %dx, %dx
+; X86-SLOW-NEXT: je .LBB1_2
+; X86-SLOW-NEXT: # %bb.1:
+; X86-SLOW-NEXT: orl %edi, %esi
+; X86-SLOW-NEXT: movl %esi, %eax
+; X86-SLOW-NEXT: .LBB1_2:
+; X86-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-SLOW-NEXT: popl %esi
+; X86-SLOW-NEXT: popl %edi
+; X86-SLOW-NEXT: retl
+;
+; X64-FAST-LABEL: var_shift_i16:
+; X64-FAST: # %bb.0:
+; X64-FAST-NEXT: movl %edx, %ecx
+; X64-FAST-NEXT: andl $15, %ecx
+; X64-FAST-NEXT: movl %esi, %eax
+; X64-FAST-NEXT: shrdw %cl, %di, %ax
+; X64-FAST-NEXT: testw %cx, %cx
+; X64-FAST-NEXT: cmovel %esi, %eax
+; X64-FAST-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-FAST-NEXT: retq
+;
+; X64-SLOW-LABEL: var_shift_i16:
+; X64-SLOW: # %bb.0:
+; X64-SLOW-NEXT: movzwl %si, %eax
+; X64-SLOW-NEXT: andl $15, %edx
+; X64-SLOW-NEXT: movl %edx, %ecx
+; X64-SLOW-NEXT: shrl %cl, %eax
+; X64-SLOW-NEXT: movl $16, %ecx
+; X64-SLOW-NEXT: subl %edx, %ecx
+; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-SLOW-NEXT: shll %cl, %edi
+; X64-SLOW-NEXT: orl %edi, %eax
+; X64-SLOW-NEXT: testw %dx, %dx
+; X64-SLOW-NEXT: cmovel %esi, %eax
+; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-SLOW-NEXT: retq
%tmp = tail call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 %z)
ret i16 %tmp
}
define i32 @var_shift_i32(i32 %x, i32 %y, i32 %z) nounwind {
-; X86-LABEL: var_shift_i32:
-; X86: # %bb.0:
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: andl $31, %ecx
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: shrdl %cl, %esi, %edx
-; X86-NEXT: testl %ecx, %ecx
-; X86-NEXT: je .LBB2_2
-; X86-NEXT: # %bb.1:
-; X86-NEXT: movl %edx, %eax
-; X86-NEXT: .LBB2_2:
-; X86-NEXT: popl %esi
-; X86-NEXT: retl
-;
-; X64-LABEL: var_shift_i32:
-; X64: # %bb.0:
-; X64-NEXT: movl %edx, %ecx
-; X64-NEXT: andl $31, %ecx
-; X64-NEXT: movl %esi, %eax
-; X64-NEXT: shrdl %cl, %edi, %eax
-; X64-NEXT: testl %ecx, %ecx
-; X64-NEXT: cmovel %esi, %eax
-; X64-NEXT: retq
+; X86-FAST-LABEL: var_shift_i32:
+; X86-FAST: # %bb.0:
+; X86-FAST-NEXT: pushl %esi
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT: andl $31, %ecx
+; X86-FAST-NEXT: movl %eax, %edx
+; X86-FAST-NEXT: shrdl %cl, %esi, %edx
+; X86-FAST-NEXT: testl %ecx, %ecx
+; X86-FAST-NEXT: je .LBB2_2
+; X86-FAST-NEXT: # %bb.1:
+; X86-FAST-NEXT: movl %edx, %eax
+; X86-FAST-NEXT: .LBB2_2:
+; X86-FAST-NEXT: popl %esi
+; X86-FAST-NEXT: retl
+;
+; X86-SLOW-LABEL: var_shift_i32:
+; X86-SLOW: # %bb.0:
+; X86-SLOW-NEXT: pushl %edi
+; X86-SLOW-NEXT: pushl %esi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SLOW-NEXT: andl $31, %edx
+; X86-SLOW-NEXT: movl %eax, %edi
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: shrl %cl, %edi
+; X86-SLOW-NEXT: movl %edx, %ecx
+; X86-SLOW-NEXT: negl %ecx
+; X86-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X86-SLOW-NEXT: shll %cl, %esi
+; X86-SLOW-NEXT: testl %edx, %edx
+; X86-SLOW-NEXT: je .LBB2_2
+; X86-SLOW-NEXT: # %bb.1:
+; X86-SLOW-NEXT: orl %edi, %esi
+; X86-SLOW-NEXT: movl %esi, %eax
+; X86-SLOW-NEXT: .LBB2_2:
+; X86-SLOW-NEXT: popl %esi
+; X86-SLOW-NEXT: popl %edi
+; X86-SLOW-NEXT: retl
+;
+; X64-FAST-LABEL: var_shift_i32:
+; X64-FAST: # %bb.0:
+; X64-FAST-NEXT: movl %edx, %ecx
+; X64-FAST-NEXT: andl $31, %ecx
+; X64-FAST-NEXT: movl %esi, %eax
+; X64-FAST-NEXT: shrdl %cl, %edi, %eax
+; X64-FAST-NEXT: testl %ecx, %ecx
+; X64-FAST-NEXT: cmovel %esi, %eax
+; X64-FAST-NEXT: retq
+;
+; X64-SLOW-LABEL: var_shift_i32:
+; X64-SLOW: # %bb.0:
+; X64-SLOW-NEXT: movl %edi, %eax
+; X64-SLOW-NEXT: andl $31, %edx
+; X64-SLOW-NEXT: movl %esi, %edi
+; X64-SLOW-NEXT: movl %edx, %ecx
+; X64-SLOW-NEXT: shrl %cl, %edi
+; X64-SLOW-NEXT: movl %edx, %ecx
+; X64-SLOW-NEXT: negl %ecx
+; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-SLOW-NEXT: shll %cl, %eax
+; X64-SLOW-NEXT: orl %edi, %eax
+; X64-SLOW-NEXT: testl %edx, %edx
+; X64-SLOW-NEXT: cmovel %esi, %eax
+; X64-SLOW-NEXT: retq
%tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
ret i32 %tmp
}
@@ -170,15 +255,31 @@ define i64 @var_shift_i64(i64 %x, i64 %y
; X86-NEXT: popl %ebp
; X86-NEXT: retl
;
-; X64-LABEL: var_shift_i64:
-; X64: # %bb.0:
-; X64-NEXT: movq %rdx, %rcx
-; X64-NEXT: andl $63, %ecx
-; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: shrdq %cl, %rdi, %rax
-; X64-NEXT: testq %rcx, %rcx
-; X64-NEXT: cmoveq %rsi, %rax
-; X64-NEXT: retq
+; X64-FAST-LABEL: var_shift_i64:
+; X64-FAST: # %bb.0:
+; X64-FAST-NEXT: movq %rdx, %rcx
+; X64-FAST-NEXT: andl $63, %ecx
+; X64-FAST-NEXT: movq %rsi, %rax
+; X64-FAST-NEXT: shrdq %cl, %rdi, %rax
+; X64-FAST-NEXT: testq %rcx, %rcx
+; X64-FAST-NEXT: cmoveq %rsi, %rax
+; X64-FAST-NEXT: retq
+;
+; X64-SLOW-LABEL: var_shift_i64:
+; X64-SLOW: # %bb.0:
+; X64-SLOW-NEXT: movq %rdi, %rax
+; X64-SLOW-NEXT: andl $63, %edx
+; X64-SLOW-NEXT: movq %rsi, %rdi
+; X64-SLOW-NEXT: movl %edx, %ecx
+; X64-SLOW-NEXT: shrq %cl, %rdi
+; X64-SLOW-NEXT: movl %edx, %ecx
+; X64-SLOW-NEXT: negl %ecx
+; X64-SLOW-NEXT: # kill: def $cl killed $cl killed $ecx
+; X64-SLOW-NEXT: shlq %cl, %rax
+; X64-SLOW-NEXT: orq %rdi, %rax
+; X64-SLOW-NEXT: testq %rdx, %rdx
+; X64-SLOW-NEXT: cmoveq %rsi, %rax
+; X64-SLOW-NEXT: retq
%tmp = tail call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 %z)
ret i64 %tmp
}
@@ -210,55 +311,115 @@ define i8 @const_shift_i8(i8 %x, i8 %y)
}
define i16 @const_shift_i16(i16 %x, i16 %y) nounwind {
-; X86-LABEL: const_shift_i16:
-; X86: # %bb.0:
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: shldw $9, %cx, %ax
-; X86-NEXT: retl
-;
-; X64-LABEL: const_shift_i16:
-; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shldw $9, %si, %ax
-; X64-NEXT: # kill: def $ax killed $ax killed $eax
-; X64-NEXT: retq
+; X86-FAST-LABEL: const_shift_i16:
+; X86-FAST: # %bb.0:
+; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT: movzwl {{[0-9]+}}(%esp), %eax
+; X86-FAST-NEXT: shldw $9, %cx, %ax
+; X86-FAST-NEXT: retl
+;
+; X86-SLOW-LABEL: const_shift_i16:
+; X86-SLOW: # %bb.0:
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT: shrl $7, %ecx
+; X86-SLOW-NEXT: shll $9, %eax
+; X86-SLOW-NEXT: orl %ecx, %eax
+; X86-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
+; X86-SLOW-NEXT: retl
+;
+; X64-FAST-LABEL: const_shift_i16:
+; X64-FAST: # %bb.0:
+; X64-FAST-NEXT: movl %edi, %eax
+; X64-FAST-NEXT: shldw $9, %si, %ax
+; X64-FAST-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-FAST-NEXT: retq
+;
+; X64-SLOW-LABEL: const_shift_i16:
+; X64-SLOW: # %bb.0:
+; X64-SLOW-NEXT: movzwl %si, %eax
+; X64-SLOW-NEXT: shll $9, %edi
+; X64-SLOW-NEXT: shrl $7, %eax
+; X64-SLOW-NEXT: orl %edi, %eax
+; X64-SLOW-NEXT: # kill: def $ax killed $ax killed $eax
+; X64-SLOW-NEXT: retq
%tmp = tail call i16 @llvm.fshr.i16(i16 %x, i16 %y, i16 7)
ret i16 %tmp
}
define i32 @const_shift_i32(i32 %x, i32 %y) nounwind {
-; X86-LABEL: const_shift_i32:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: shldl $25, %ecx, %eax
-; X86-NEXT: retl
-;
-; X64-LABEL: const_shift_i32:
-; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shldl $25, %esi, %eax
-; X64-NEXT: retq
+; X86-FAST-LABEL: const_shift_i32:
+; X86-FAST: # %bb.0:
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-FAST-NEXT: shldl $25, %ecx, %eax
+; X86-FAST-NEXT: retl
+;
+; X86-SLOW-LABEL: const_shift_i32:
+; X86-SLOW: # %bb.0:
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT: shrl $7, %ecx
+; X86-SLOW-NEXT: shll $25, %eax
+; X86-SLOW-NEXT: orl %ecx, %eax
+; X86-SLOW-NEXT: retl
+;
+; X64-FAST-LABEL: const_shift_i32:
+; X64-FAST: # %bb.0:
+; X64-FAST-NEXT: movl %edi, %eax
+; X64-FAST-NEXT: shldl $25, %esi, %eax
+; X64-FAST-NEXT: retq
+;
+; X64-SLOW-LABEL: const_shift_i32:
+; X64-SLOW: # %bb.0:
+; X64-SLOW-NEXT: # kill: def $esi killed $esi def $rsi
+; X64-SLOW-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-SLOW-NEXT: shrl $7, %esi
+; X64-SLOW-NEXT: shll $25, %edi
+; X64-SLOW-NEXT: leal (%rdi,%rsi), %eax
+; X64-SLOW-NEXT: retq
%tmp = tail call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 7)
ret i32 %tmp
}
define i64 @const_shift_i64(i64 %x, i64 %y) nounwind {
-; X86-LABEL: const_shift_i64:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT: shldl $25, %ecx, %edx
-; X86-NEXT: shrdl $7, %ecx, %eax
-; X86-NEXT: retl
-;
-; X64-LABEL: const_shift_i64:
-; X64: # %bb.0:
-; X64-NEXT: movq %rdi, %rax
-; X64-NEXT: shldq $57, %rsi, %rax
-; X64-NEXT: retq
+; X86-FAST-LABEL: const_shift_i64:
+; X86-FAST: # %bb.0:
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-FAST-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-FAST-NEXT: shldl $25, %ecx, %edx
+; X86-FAST-NEXT: shrdl $7, %ecx, %eax
+; X86-FAST-NEXT: retl
+;
+; X86-SLOW-LABEL: const_shift_i64:
+; X86-SLOW: # %bb.0:
+; X86-SLOW-NEXT: pushl %esi
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %edx
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLOW-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-SLOW-NEXT: movl %ecx, %esi
+; X86-SLOW-NEXT: shll $25, %esi
+; X86-SLOW-NEXT: shrl $7, %eax
+; X86-SLOW-NEXT: orl %esi, %eax
+; X86-SLOW-NEXT: shrl $7, %ecx
+; X86-SLOW-NEXT: shll $25, %edx
+; X86-SLOW-NEXT: orl %ecx, %edx
+; X86-SLOW-NEXT: popl %esi
+; X86-SLOW-NEXT: retl
+;
+; X64-FAST-LABEL: const_shift_i64:
+; X64-FAST: # %bb.0:
+; X64-FAST-NEXT: movq %rdi, %rax
+; X64-FAST-NEXT: shldq $57, %rsi, %rax
+; X64-FAST-NEXT: retq
+;
+; X64-SLOW-LABEL: const_shift_i64:
+; X64-SLOW: # %bb.0:
+; X64-SLOW-NEXT: shrq $7, %rsi
+; X64-SLOW-NEXT: shlq $57, %rdi
+; X64-SLOW-NEXT: leaq (%rdi,%rsi), %rax
+; X64-SLOW-NEXT: retq
%tmp = tail call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 7)
ret i64 %tmp
}
More information about the llvm-commits
mailing list