[llvm] 1430405 - [X86] funnel-shifts.ll - add VBMI2 and non-uniform shift amounts test coverage

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri May 24 06:43:19 PDT 2024


Author: Simon Pilgrim
Date: 2024-05-24T14:36:48+01:00
New Revision: 14304055e0d223a6dd224625b8fd128e6f711eb5

URL: https://github.com/llvm/llvm-project/commit/14304055e0d223a6dd224625b8fd128e6f711eb5
DIFF: https://github.com/llvm/llvm-project/commit/14304055e0d223a6dd224625b8fd128e6f711eb5.diff

LOG: [X86] funnel-shifts.ll - add VBMI2 and non-uniform shift amounts test coverage

VBMI2 has legal FSHL/FSHR operations which makes it easier to test non-uniform shift amounts as it won't get expanded

Added: 
    

Modified: 
    llvm/test/CodeGen/X86/funnel-shift.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/X86/funnel-shift.ll b/llvm/test/CodeGen/X86/funnel-shift.ll
index c6f0662cadd6b..a464d78f9af38 100644
--- a/llvm/test/CodeGen/X86/funnel-shift.ll
+++ b/llvm/test/CodeGen/X86/funnel-shift.ll
@@ -1,6 +1,7 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 ; RUN: llc < %s -mtriple=i686--   -mattr=sse2 | FileCheck %s --check-prefixes=CHECK,X86-SSE2
-; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,X64-AVX2
+; RUN: llc < %s -mtriple=x86_64-- -mattr=avx2 | FileCheck %s --check-prefixes=CHECK,X64-AVX,X64-AVX2
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx512f,+avx512vbmi,+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64-AVX,X64-VBMI2
 
 declare i8 @llvm.fshl.i8(i8, i8, i8)
 declare i16 @llvm.fshl.i16(i16, i16, i16)
@@ -26,13 +27,13 @@ define i32 @fshl_i32(i32 %x, i32 %y, i32 %z) nounwind {
 ; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshl_i32:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edx, %ecx
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shldl %cl, %esi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshl_i32:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edx, %ecx
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    shldl %cl, %esi, %eax
+; X64-AVX-NEXT:    retq
   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %z)
   ret i32 %f
 }
@@ -58,13 +59,13 @@ define i64 @fshl_i64(i64 %x, i64 %y, i64 %z) nounwind {
 ; X86-SSE2-NEXT:    popl %edi
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshl_i64:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movq %rdx, %rcx
-; X64-AVX2-NEXT:    movq %rdi, %rax
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-AVX2-NEXT:    shldq %cl, %rsi, %rax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshl_i64:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movq %rdx, %rcx
+; X64-AVX-NEXT:    movq %rdi, %rax
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-AVX-NEXT:    shldq %cl, %rsi, %rax
+; X64-AVX-NEXT:    retq
   %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 %z)
   ret i64 %f
 }
@@ -116,18 +117,18 @@ define i128 @fshl_i128(i128 %x, i128 %y, i128 %z) nounwind {
 ; X86-SSE2-NEXT:    popl %ebp
 ; X86-SSE2-NEXT:    retl $4
 ;
-; X64-AVX2-LABEL: fshl_i128:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    testb $64, %r8b
-; X64-AVX2-NEXT:    cmovneq %rdi, %rsi
-; X64-AVX2-NEXT:    cmoveq %rcx, %rdx
-; X64-AVX2-NEXT:    cmovneq %rcx, %rdi
-; X64-AVX2-NEXT:    movq %rdi, %rax
-; X64-AVX2-NEXT:    movl %r8d, %ecx
-; X64-AVX2-NEXT:    shldq %cl, %rdx, %rax
-; X64-AVX2-NEXT:    shldq %cl, %rdi, %rsi
-; X64-AVX2-NEXT:    movq %rsi, %rdx
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshl_i128:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    testb $64, %r8b
+; X64-AVX-NEXT:    cmovneq %rdi, %rsi
+; X64-AVX-NEXT:    cmoveq %rcx, %rdx
+; X64-AVX-NEXT:    cmovneq %rcx, %rdi
+; X64-AVX-NEXT:    movq %rdi, %rax
+; X64-AVX-NEXT:    movl %r8d, %ecx
+; X64-AVX-NEXT:    shldq %cl, %rdx, %rax
+; X64-AVX-NEXT:    shldq %cl, %rdi, %rsi
+; X64-AVX-NEXT:    movq %rsi, %rdx
+; X64-AVX-NEXT:    retq
   %f = call i128 @llvm.fshl.i128(i128 %x, i128 %y, i128 %z)
   ret i128 %f
 }
@@ -173,21 +174,21 @@ define i37 @fshl_i37(i37 %x, i37 %y, i37 %z) nounwind {
 ; X86-SSE2-NEXT:    popl %ebx
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshl_i37:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movq %rdx, %rcx
-; X64-AVX2-NEXT:    movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF
-; X64-AVX2-NEXT:    andq %rdx, %rax
-; X64-AVX2-NEXT:    movabsq $498560650640798693, %rdx # imm = 0x6EB3E45306EB3E5
-; X64-AVX2-NEXT:    mulq %rdx
-; X64-AVX2-NEXT:    leal (%rdx,%rdx,8), %eax
-; X64-AVX2-NEXT:    leal (%rdx,%rax,4), %eax
-; X64-AVX2-NEXT:    subl %eax, %ecx
-; X64-AVX2-NEXT:    shlq $27, %rsi
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-AVX2-NEXT:    shldq %cl, %rsi, %rdi
-; X64-AVX2-NEXT:    movq %rdi, %rax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshl_i37:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movq %rdx, %rcx
+; X64-AVX-NEXT:    movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF
+; X64-AVX-NEXT:    andq %rdx, %rax
+; X64-AVX-NEXT:    movabsq $498560650640798693, %rdx # imm = 0x6EB3E45306EB3E5
+; X64-AVX-NEXT:    mulq %rdx
+; X64-AVX-NEXT:    leal (%rdx,%rdx,8), %eax
+; X64-AVX-NEXT:    leal (%rdx,%rax,4), %eax
+; X64-AVX-NEXT:    subl %eax, %ecx
+; X64-AVX-NEXT:    shlq $27, %rsi
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-AVX-NEXT:    shldq %cl, %rsi, %rdi
+; X64-AVX-NEXT:    movq %rdi, %rax
+; X64-AVX-NEXT:    retq
   %f = call i37 @llvm.fshl.i37(i37 %x, i37 %y, i37 %z)
   ret i37 %f
 }
@@ -214,11 +215,11 @@ define i32 @fshl_i32_const_shift(i32 %x, i32 %y) nounwind {
 ; X86-SSE2-NEXT:    shldl $9, %ecx, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshl_i32_const_shift:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shldl $9, %esi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshl_i32_const_shift:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    shldl $9, %esi, %eax
+; X64-AVX-NEXT:    retq
   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
   ret i32 %f
 }
@@ -233,11 +234,11 @@ define i32 @fshl_i32_const_overshift(i32 %x, i32 %y) nounwind {
 ; X86-SSE2-NEXT:    shldl $9, %ecx, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshl_i32_const_overshift:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shldl $9, %esi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshl_i32_const_overshift:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    shldl $9, %esi, %eax
+; X64-AVX-NEXT:    retq
   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 41)
   ret i32 %f
 }
@@ -254,11 +255,11 @@ define i64 @fshl_i64_const_overshift(i64 %x, i64 %y) nounwind {
 ; X86-SSE2-NEXT:    shrdl $23, %ecx, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshl_i64_const_overshift:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movq %rdi, %rax
-; X64-AVX2-NEXT:    shldq $41, %rsi, %rax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshl_i64_const_overshift:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movq %rdi, %rax
+; X64-AVX-NEXT:    shldq $41, %rsi, %rax
+; X64-AVX-NEXT:    retq
   %f = call i64 @llvm.fshl.i64(i64 %x, i64 %y, i64 105)
   ret i64 %f
 }
@@ -287,13 +288,13 @@ define i32 @fshr_i32(i32 %x, i32 %y, i32 %z) nounwind {
 ; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshr_i32:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edx, %ecx
-; X64-AVX2-NEXT:    movl %esi, %eax
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shrdl %cl, %edi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshr_i32:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edx, %ecx
+; X64-AVX-NEXT:    movl %esi, %eax
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    shrdl %cl, %edi, %eax
+; X64-AVX-NEXT:    retq
   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %z)
   ret i32 %f
 }
@@ -340,22 +341,22 @@ define i37 @fshr_i37(i37 %x, i37 %y, i37 %z) nounwind {
 ; X86-SSE2-NEXT:    popl %ebx
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshr_i37:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movq %rdx, %rcx
-; X64-AVX2-NEXT:    movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF
-; X64-AVX2-NEXT:    andq %rdx, %rax
-; X64-AVX2-NEXT:    movabsq $498560650640798693, %rdx # imm = 0x6EB3E45306EB3E5
-; X64-AVX2-NEXT:    mulq %rdx
-; X64-AVX2-NEXT:    leal (%rdx,%rdx,8), %eax
-; X64-AVX2-NEXT:    leal (%rdx,%rax,4), %eax
-; X64-AVX2-NEXT:    subl %eax, %ecx
-; X64-AVX2-NEXT:    addl $27, %ecx
-; X64-AVX2-NEXT:    shlq $27, %rsi
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $rcx
-; X64-AVX2-NEXT:    shrdq %cl, %rdi, %rsi
-; X64-AVX2-NEXT:    movq %rsi, %rax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshr_i37:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movq %rdx, %rcx
+; X64-AVX-NEXT:    movabsq $137438953471, %rax # imm = 0x1FFFFFFFFF
+; X64-AVX-NEXT:    andq %rdx, %rax
+; X64-AVX-NEXT:    movabsq $498560650640798693, %rdx # imm = 0x6EB3E45306EB3E5
+; X64-AVX-NEXT:    mulq %rdx
+; X64-AVX-NEXT:    leal (%rdx,%rdx,8), %eax
+; X64-AVX-NEXT:    leal (%rdx,%rax,4), %eax
+; X64-AVX-NEXT:    subl %eax, %ecx
+; X64-AVX-NEXT:    addl $27, %ecx
+; X64-AVX-NEXT:    shlq $27, %rsi
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $rcx
+; X64-AVX-NEXT:    shrdq %cl, %rdi, %rsi
+; X64-AVX-NEXT:    movq %rsi, %rax
+; X64-AVX-NEXT:    retq
   %f = call i37 @llvm.fshr.i37(i37 %x, i37 %y, i37 %z)
   ret i37 %f
 }
@@ -382,11 +383,11 @@ define i32 @fshl_i32_demandedbits(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-NEXT:    shldl $9, %ecx, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshl_i32_demandedbits:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shldl $9, %esi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshl_i32_demandedbits:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    shldl $9, %esi, %eax
+; X64-AVX-NEXT:    retq
   %x = or i32 %a0, 2147483648
   %y = or i32 %a1, 1
   %res = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 9)
@@ -401,11 +402,11 @@ define i32 @fshr_i32_demandedbits(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-NEXT:    shrdl $9, %ecx, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshr_i32_demandedbits:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shldl $23, %esi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshr_i32_demandedbits:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    shldl $23, %esi, %eax
+; X64-AVX-NEXT:    retq
   %x = or i32 %a0, 2147483648
   %y = or i32 %a1, 1
   %res = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
@@ -422,12 +423,12 @@ define i32 @fshl_i32_undef0(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-NEXT:    shldl %cl, %eax, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshl_i32_undef0:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %esi, %ecx
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shldl %cl, %edi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshl_i32_undef0:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %esi, %ecx
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    shldl %cl, %edi, %eax
+; X64-AVX-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 %a1)
   ret i32 %res
 }
@@ -442,13 +443,13 @@ define i32 @fshl_i32_undef0_msk(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-NEXT:    shldl %cl, %eax, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshl_i32_undef0_msk:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %esi, %ecx
-; X64-AVX2-NEXT:    andl $7, %ecx
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shldl %cl, %edi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshl_i32_undef0_msk:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %esi, %ecx
+; X64-AVX-NEXT:    andl $7, %ecx
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    shldl %cl, %edi, %eax
+; X64-AVX-NEXT:    retq
   %m = and i32 %a1, 7
   %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 %m)
   ret i32 %res
@@ -461,15 +462,43 @@ define i32 @fshl_i32_undef0_cst(i32 %a0) nounwind {
 ; X86-SSE2-NEXT:    shrl $23, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshl_i32_undef0_cst:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shrl $23, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshl_i32_undef0_cst:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    shrl $23, %eax
+; X64-AVX-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 undef, i32 %a0, i32 9)
   ret i32 %res
 }
 
+define <4 x i32> @fshl_v4i32_undef0_cst(<4 x i32> %a0) nounwind {
+; X86-SSE2-LABEL: fshl_v4i32_undef0_cst:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT:    psrld $20, %xmm1
+; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
+; X86-SSE2-NEXT:    psrld $21, %xmm2
+; X86-SSE2-NEXT:    punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
+; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT:    psrld $22, %xmm1
+; X86-SSE2-NEXT:    psrld $23, %xmm0
+; X86-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X86-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3]
+; X86-SSE2-NEXT:    retl
+;
+; X64-AVX2-LABEL: fshl_v4i32_undef0_cst:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
+;
+; X64-VBMI2-LABEL: fshl_v4i32_undef0_cst:
+; X64-VBMI2:       # %bb.0:
+; X64-VBMI2-NEXT:    vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-VBMI2-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> undef, <4 x i32> %a0, <4 x i32> <i32 9, i32 10, i32 11, i32 12>)
+  ret <4 x i32> %res
+}
+
 define i32 @fshl_i32_undef1(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-LABEL: fshl_i32_undef1:
 ; X86-SSE2:       # %bb.0:
@@ -478,13 +507,13 @@ define i32 @fshl_i32_undef1(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-NEXT:    shldl %cl, %eax, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshl_i32_undef1:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %esi, %ecx
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shldl %cl, %eax, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshl_i32_undef1:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %esi, %ecx
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    shldl %cl, %eax, %eax
+; X64-AVX-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 %a1)
   ret i32 %res
 }
@@ -498,14 +527,14 @@ define i32 @fshl_i32_undef1_msk(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-NEXT:    shll %cl, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshl_i32_undef1_msk:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %esi, %ecx
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    andb $7, %cl
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shll %cl, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshl_i32_undef1_msk:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %esi, %ecx
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    andb $7, %cl
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    shll %cl, %eax
+; X64-AVX-NEXT:    retq
   %m = and i32 %a1, 7
   %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 %m)
   ret i32 %res
@@ -518,15 +547,34 @@ define i32 @fshl_i32_undef1_cst(i32 %a0) nounwind {
 ; X86-SSE2-NEXT:    shll $9, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshl_i32_undef1_cst:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shll $9, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshl_i32_undef1_cst:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    shll $9, %eax
+; X64-AVX-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 %a0, i32 undef, i32 9)
   ret i32 %res
 }
 
+define <4 x i32> @fshl_v4i32_undef1_cst(<4 x i32> %a0) nounwind {
+; X86-SSE2-LABEL: fshl_v4i32_undef1_cst:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; X86-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X86-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE2-NEXT:    retl
+;
+; X64-AVX-LABEL: fshl_v4i32_undef1_cst:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshl.v4i32(<4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 9, i32 10, i32 11, i32 12>)
+  ret <4 x i32> %res
+}
+
 define i32 @fshl_i32_undef2(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-LABEL: fshl_i32_undef2:
 ; X86-SSE2:       # %bb.0:
@@ -535,11 +583,11 @@ define i32 @fshl_i32_undef2(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-NEXT:    shldl %cl, %ecx, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshl_i32_undef2:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shldl %cl, %esi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshl_i32_undef2:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    shldl %cl, %esi, %eax
+; X64-AVX-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 %a0, i32 %a1, i32 undef)
   ret i32 %res
 }
@@ -552,13 +600,13 @@ define i32 @fshr_i32_undef0(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-NEXT:    shrdl %cl, %eax, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshr_i32_undef0:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %esi, %ecx
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shrdl %cl, %eax, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshr_i32_undef0:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %esi, %ecx
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    shrdl %cl, %eax, %eax
+; X64-AVX-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 %a1)
   ret i32 %res
 }
@@ -572,14 +620,14 @@ define i32 @fshr_i32_undef0_msk(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-NEXT:    shrl %cl, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshr_i32_undef0_msk:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %esi, %ecx
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    andb $7, %cl
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shrl %cl, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshr_i32_undef0_msk:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %esi, %ecx
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    andb $7, %cl
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    shrl %cl, %eax
+; X64-AVX-NEXT:    retq
   %m = and i32 %a1, 7
   %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 %m)
   ret i32 %res
@@ -592,15 +640,38 @@ define i32 @fshr_i32_undef0_cst(i32 %a0) nounwind {
 ; X86-SSE2-NEXT:    shrl $9, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshr_i32_undef0_cst:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shrl $9, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshr_i32_undef0_cst:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    shrl $9, %eax
+; X64-AVX-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 undef, i32 %a0, i32 9)
   ret i32 %res
 }
 
+define <4 x i32> @fshr_v4i32_undef0_cst(<4 x i32> %a0) nounwind {
+; X86-SSE2-LABEL: fshr_v4i32_undef0_cst:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT:    psrld $12, %xmm1
+; X86-SSE2-NEXT:    movdqa %xmm0, %xmm2
+; X86-SSE2-NEXT:    psrld $11, %xmm2
+; X86-SSE2-NEXT:    punpckhqdq {{.*#+}} xmm2 = xmm2[1],xmm1[1]
+; X86-SSE2-NEXT:    movdqa %xmm0, %xmm1
+; X86-SSE2-NEXT:    psrld $10, %xmm1
+; X86-SSE2-NEXT:    psrld $9, %xmm0
+; X86-SSE2-NEXT:    punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0]
+; X86-SSE2-NEXT:    shufps {{.*#+}} xmm0 = xmm0[0,3],xmm2[0,3]
+; X86-SSE2-NEXT:    retl
+;
+; X64-AVX-LABEL: fshr_v4i32_undef0_cst:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    vpsrlvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> undef, <4 x i32> %a0, <4 x i32> <i32 9, i32 10, i32 11, i32 12>)
+  ret <4 x i32> %res
+}
+
 define i32 @fshr_i32_undef1(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-LABEL: fshr_i32_undef1:
 ; X86-SSE2:       # %bb.0:
@@ -609,12 +680,12 @@ define i32 @fshr_i32_undef1(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-NEXT:    shrdl %cl, %eax, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshr_i32_undef1:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %esi, %ecx
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shrdl %cl, %edi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshr_i32_undef1:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %esi, %ecx
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    shrdl %cl, %edi, %eax
+; X64-AVX-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 %a1)
   ret i32 %res
 }
@@ -629,13 +700,13 @@ define i32 @fshr_i32_undef1_msk(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-NEXT:    shrdl %cl, %eax, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshr_i32_undef1_msk:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %esi, %ecx
-; X64-AVX2-NEXT:    andl $7, %ecx
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shrdl %cl, %edi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshr_i32_undef1_msk:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %esi, %ecx
+; X64-AVX-NEXT:    andl $7, %ecx
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    shrdl %cl, %edi, %eax
+; X64-AVX-NEXT:    retq
   %m = and i32 %a1, 7
   %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 %m)
   ret i32 %res
@@ -648,15 +719,39 @@ define i32 @fshr_i32_undef1_cst(i32 %a0) nounwind {
 ; X86-SSE2-NEXT:    shll $23, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshr_i32_undef1_cst:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shll $23, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshr_i32_undef1_cst:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    shll $23, %eax
+; X64-AVX-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 %a0, i32 undef, i32 9)
   ret i32 %res
 }
 
+define <4 x i32> @fshr_v4i32_undef1_cst(<4 x i32> %a0) nounwind {
+; X86-SSE2-LABEL: fshr_v4i32_undef1_cst:
+; X86-SSE2:       # %bb.0:
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm0[1,1,3,3]
+; X86-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X86-SSE2-NEXT:    pmuludq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1
+; X86-SSE2-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,2,2,3]
+; X86-SSE2-NEXT:    punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
+; X86-SSE2-NEXT:    retl
+;
+; X64-AVX2-LABEL: fshr_v4i32_undef1_cst:
+; X64-AVX2:       # %bb.0:
+; X64-AVX2-NEXT:    vpsllvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-AVX2-NEXT:    retq
+;
+; X64-VBMI2-LABEL: fshr_v4i32_undef1_cst:
+; X64-VBMI2:       # %bb.0:
+; X64-VBMI2-NEXT:    vpshrdvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-VBMI2-NEXT:    retq
+  %res = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 9, i32 10, i32 11, i32 12>)
+  ret <4 x i32> %res
+}
+
 define i32 @fshr_i32_undef2(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-LABEL: fshr_i32_undef2:
 ; X86-SSE2:       # %bb.0:
@@ -665,11 +760,11 @@ define i32 @fshr_i32_undef2(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-NEXT:    shrdl %cl, %ecx, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshr_i32_undef2:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %esi, %eax
-; X64-AVX2-NEXT:    shrdl %cl, %edi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshr_i32_undef2:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %esi, %eax
+; X64-AVX-NEXT:    shrdl %cl, %edi, %eax
+; X64-AVX-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 %a0, i32 %a1, i32 undef)
   ret i32 %res
 }
@@ -685,13 +780,13 @@ define i32 @fshl_i32_zero0(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshl_i32_zero0:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %esi, %ecx
-; X64-AVX2-NEXT:    xorl %eax, %eax
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shldl %cl, %edi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshl_i32_zero0:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %esi, %ecx
+; X64-AVX-NEXT:    xorl %eax, %eax
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    shldl %cl, %edi, %eax
+; X64-AVX-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 0, i32 %a0, i32 %a1)
   ret i32 %res
 }
@@ -703,11 +798,11 @@ define i32 @fshl_i32_zero0_cst(i32 %a0) nounwind {
 ; X86-SSE2-NEXT:    shrl $23, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshl_i32_zero0_cst:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shrl $23, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshl_i32_zero0_cst:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    shrl $23, %eax
+; X64-AVX-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 0, i32 %a0, i32 9)
   ret i32 %res
 }
@@ -721,14 +816,14 @@ define i32 @fshl_i32_zero1(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshl_i32_zero1:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %esi, %ecx
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    xorl %edx, %edx
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shldl %cl, %edx, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshl_i32_zero1:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %esi, %ecx
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    xorl %edx, %edx
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    shldl %cl, %edx, %eax
+; X64-AVX-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 %a0, i32 0, i32 %a1)
   ret i32 %res
 }
@@ -740,11 +835,11 @@ define i32 @fshl_i32_zero1_cst(i32 %a0) nounwind {
 ; X86-SSE2-NEXT:    shll $9, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshl_i32_zero1_cst:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shll $9, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshl_i32_zero1_cst:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    shll $9, %eax
+; X64-AVX-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 %a0, i32 0, i32 9)
   ret i32 %res
 }
@@ -758,14 +853,14 @@ define i32 @fshr_i32_zero0(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshr_i32_zero0:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %esi, %ecx
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    xorl %edx, %edx
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shrdl %cl, %edx, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshr_i32_zero0:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %esi, %ecx
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    xorl %edx, %edx
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    shrdl %cl, %edx, %eax
+; X64-AVX-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 0, i32 %a0, i32 %a1)
   ret i32 %res
 }
@@ -777,11 +872,11 @@ define i32 @fshr_i32_zero0_cst(i32 %a0) nounwind {
 ; X86-SSE2-NEXT:    shrl $9, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshr_i32_zero0_cst:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shrl $9, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshr_i32_zero0_cst:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    shrl $9, %eax
+; X64-AVX-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 0, i32 %a0, i32 9)
   ret i32 %res
 }
@@ -795,13 +890,13 @@ define i32 @fshr_i32_zero1(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshr_i32_zero1:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %esi, %ecx
-; X64-AVX2-NEXT:    xorl %eax, %eax
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shrdl %cl, %edi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshr_i32_zero1:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %esi, %ecx
+; X64-AVX-NEXT:    xorl %eax, %eax
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    shrdl %cl, %edi, %eax
+; X64-AVX-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 %a0, i32 0, i32 %a1)
   ret i32 %res
 }
@@ -813,11 +908,11 @@ define i32 @fshr_i32_zero1_cst(i32 %a0) nounwind {
 ; X86-SSE2-NEXT:    shll $23, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshr_i32_zero1_cst:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shll $23, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshr_i32_zero1_cst:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    shll $23, %eax
+; X64-AVX-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 %a0, i32 0, i32 9)
   ret i32 %res
 }
@@ -830,10 +925,10 @@ define i32 @fshl_i32_zero2(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshl_i32_zero2:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshl_i32_zero2:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    retq
   %res = call i32 @llvm.fshl.i32(i32 %a0, i32 %a1, i32 0)
   ret i32 %res
 }
@@ -844,10 +939,10 @@ define i32 @fshr_i32_zero2(i32 %a0, i32 %a1) nounwind {
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshr_i32_zero2:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %esi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshr_i32_zero2:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %esi, %eax
+; X64-AVX-NEXT:    retq
   %res = call i32 @llvm.fshr.i32(i32 %a0, i32 %a1, i32 0)
   ret i32 %res
 }
@@ -862,11 +957,11 @@ define i32 @fshr_i32_const_shift(i32 %x, i32 %y) nounwind {
 ; X86-SSE2-NEXT:    shrdl $9, %ecx, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshr_i32_const_shift:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shldl $23, %esi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshr_i32_const_shift:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    shldl $23, %esi, %eax
+; X64-AVX-NEXT:    retq
   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 9)
   ret i32 %f
 }
@@ -881,11 +976,11 @@ define i32 @fshr_i32_const_overshift(i32 %x, i32 %y) nounwind {
 ; X86-SSE2-NEXT:    shrdl $9, %ecx, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshr_i32_const_overshift:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    shldl $23, %esi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshr_i32_const_overshift:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    shldl $23, %esi, %eax
+; X64-AVX-NEXT:    retq
   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 41)
   ret i32 %f
 }
@@ -902,11 +997,11 @@ define i64 @fshr_i64_const_overshift(i64 %x, i64 %y) nounwind {
 ; X86-SSE2-NEXT:    shldl $23, %ecx, %edx
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshr_i64_const_overshift:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movq %rdi, %rax
-; X64-AVX2-NEXT:    shldq $23, %rsi, %rax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshr_i64_const_overshift:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movq %rdi, %rax
+; X64-AVX-NEXT:    shldq $23, %rsi, %rax
+; X64-AVX-NEXT:    retq
   %f = call i64 @llvm.fshr.i64(i64 %x, i64 %y, i64 105)
   ret i64 %f
 }
@@ -928,10 +1023,10 @@ define i32 @fshl_i32_shift_by_bitwidth(i32 %x, i32 %y) nounwind {
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshl_i32_shift_by_bitwidth:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshl_i32_shift_by_bitwidth:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edi, %eax
+; X64-AVX-NEXT:    retq
   %f = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 32)
   ret i32 %f
 }
@@ -942,10 +1037,10 @@ define i32 @fshr_i32_shift_by_bitwidth(i32 %x, i32 %y) nounwind {
 ; X86-SSE2-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshr_i32_shift_by_bitwidth:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %esi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshr_i32_shift_by_bitwidth:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %esi, %eax
+; X64-AVX-NEXT:    retq
   %f = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 32)
   ret i32 %f
 }
@@ -964,10 +1059,10 @@ define <4 x i32> @fshr_v4i32_shift_by_bitwidth(<4 x i32> %x, <4 x i32> %y) nounw
 ; X86-SSE2-NEXT:    movaps %xmm1, %xmm0
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: fshr_v4i32_shift_by_bitwidth:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    vmovaps %xmm1, %xmm0
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: fshr_v4i32_shift_by_bitwidth:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    vmovaps %xmm1, %xmm0
+; X64-AVX-NEXT:    retq
   %f = call <4 x i32> @llvm.fshr.v4i32(<4 x i32> %x, <4 x i32> %y, <4 x i32> <i32 32, i32 32, i32 32, i32 32>)
   ret <4 x i32> %f
 }
@@ -996,30 +1091,30 @@ define void @PR45265(i32 %0, ptr nocapture readonly %1) nounwind {
 ; X86-SSE2-NEXT:    shldl $24, %edx, %ecx
 ; X86-SSE2-NEXT:    xorl %eax, %ecx
 ; X86-SSE2-NEXT:    orl %ecx, %edi
-; X86-SSE2-NEXT:    jne .LBB46_1
+; X86-SSE2-NEXT:    jne .LBB50_1
 ; X86-SSE2-NEXT:  # %bb.2:
 ; X86-SSE2-NEXT:    popl %esi
 ; X86-SSE2-NEXT:    popl %edi
 ; X86-SSE2-NEXT:    jmp _Z3foov # TAILCALL
-; X86-SSE2-NEXT:  .LBB46_1:
+; X86-SSE2-NEXT:  .LBB50_1:
 ; X86-SSE2-NEXT:    popl %esi
 ; X86-SSE2-NEXT:    popl %edi
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: PR45265:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movslq %edi, %rax
-; X64-AVX2-NEXT:    leaq (%rax,%rax,2), %rcx
-; X64-AVX2-NEXT:    movsbq 10(%rsi,%rcx,4), %rdx
-; X64-AVX2-NEXT:    shlq $16, %rdx
-; X64-AVX2-NEXT:    movzwl 8(%rsi,%rcx,4), %edi
-; X64-AVX2-NEXT:    orq %rdx, %rdi
-; X64-AVX2-NEXT:    movq (%rsi,%rcx,4), %rcx
-; X64-AVX2-NEXT:    shrdq $40, %rdi, %rcx
-; X64-AVX2-NEXT:    cmpq %rax, %rcx
-; X64-AVX2-NEXT:    je _Z3foov # TAILCALL
-; X64-AVX2-NEXT:  # %bb.1:
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: PR45265:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movslq %edi, %rax
+; X64-AVX-NEXT:    leaq (%rax,%rax,2), %rcx
+; X64-AVX-NEXT:    movsbq 10(%rsi,%rcx,4), %rdx
+; X64-AVX-NEXT:    shlq $16, %rdx
+; X64-AVX-NEXT:    movzwl 8(%rsi,%rcx,4), %edi
+; X64-AVX-NEXT:    orq %rdx, %rdi
+; X64-AVX-NEXT:    movq (%rsi,%rcx,4), %rcx
+; X64-AVX-NEXT:    shrdq $40, %rdi, %rcx
+; X64-AVX-NEXT:    cmpq %rax, %rcx
+; X64-AVX-NEXT:    je _Z3foov # TAILCALL
+; X64-AVX-NEXT:  # %bb.1:
+; X64-AVX-NEXT:    retq
   %3 = sext i32 %0 to i64
   %4 = getelementptr inbounds %struct.S, ptr %1, i64 %3
   %5 = bitcast ptr %4 to ptr
@@ -1052,15 +1147,15 @@ define i32 @or_shl_fshl(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2-NEXT:    popl %esi
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: or_shl_fshl:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edx, %ecx
-; X64-AVX2-NEXT:    movl %esi, %eax
-; X64-AVX2-NEXT:    shll %cl, %eax
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shldl %cl, %esi, %edi
-; X64-AVX2-NEXT:    orl %edi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: or_shl_fshl:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edx, %ecx
+; X64-AVX-NEXT:    movl %esi, %eax
+; X64-AVX-NEXT:    shll %cl, %eax
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    shldl %cl, %esi, %edi
+; X64-AVX-NEXT:    orl %edi, %eax
+; X64-AVX-NEXT:    retq
   %shy = shl i32 %y, %s
   %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s)
   %or = or i32 %fun, %shy
@@ -1078,15 +1173,15 @@ define i32 @or_shl_rotl(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2-NEXT:    orl %edx, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: or_shl_rotl:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edx, %ecx
-; X64-AVX2-NEXT:    movl %esi, %eax
-; X64-AVX2-NEXT:    shll %cl, %edi
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    roll %cl, %eax
-; X64-AVX2-NEXT:    orl %edi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: or_shl_rotl:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edx, %ecx
+; X64-AVX-NEXT:    movl %esi, %eax
+; X64-AVX-NEXT:    shll %cl, %edi
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    roll %cl, %eax
+; X64-AVX-NEXT:    orl %edi, %eax
+; X64-AVX-NEXT:    retq
   %shx = shl i32 %x, %s
   %rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s)
   %or = or i32 %rot, %shx
@@ -1107,15 +1202,15 @@ define i32 @or_shl_fshl_commute(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2-NEXT:    popl %esi
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: or_shl_fshl_commute:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edx, %ecx
-; X64-AVX2-NEXT:    movl %esi, %eax
-; X64-AVX2-NEXT:    shll %cl, %eax
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shldl %cl, %esi, %edi
-; X64-AVX2-NEXT:    orl %edi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: or_shl_fshl_commute:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edx, %ecx
+; X64-AVX-NEXT:    movl %esi, %eax
+; X64-AVX-NEXT:    shll %cl, %eax
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    shldl %cl, %esi, %edi
+; X64-AVX-NEXT:    orl %edi, %eax
+; X64-AVX-NEXT:    retq
   %shy = shl i32 %y, %s
   %fun = call i32 @llvm.fshl.i32(i32 %x, i32 %y, i32 %s)
   %or = or i32 %shy, %fun
@@ -1133,15 +1228,15 @@ define i32 @or_shl_rotl_commute(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2-NEXT:    orl %edx, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: or_shl_rotl_commute:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edx, %ecx
-; X64-AVX2-NEXT:    movl %esi, %eax
-; X64-AVX2-NEXT:    shll %cl, %edi
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    roll %cl, %eax
-; X64-AVX2-NEXT:    orl %edi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: or_shl_rotl_commute:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edx, %ecx
+; X64-AVX-NEXT:    movl %esi, %eax
+; X64-AVX-NEXT:    shll %cl, %edi
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    roll %cl, %eax
+; X64-AVX-NEXT:    orl %edi, %eax
+; X64-AVX-NEXT:    retq
   %shx = shl i32 %x, %s
   %rot = call i32 @llvm.fshl.i32(i32 %y, i32 %y, i32 %s)
   %or = or i32 %shx, %rot
@@ -1162,15 +1257,15 @@ define i32 @or_lshr_fshr(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2-NEXT:    popl %esi
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: or_lshr_fshr:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edx, %ecx
-; X64-AVX2-NEXT:    movl %esi, %eax
-; X64-AVX2-NEXT:    shrl %cl, %eax
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shrdl %cl, %esi, %edi
-; X64-AVX2-NEXT:    orl %edi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: or_lshr_fshr:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edx, %ecx
+; X64-AVX-NEXT:    movl %esi, %eax
+; X64-AVX-NEXT:    shrl %cl, %eax
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    shrdl %cl, %esi, %edi
+; X64-AVX-NEXT:    orl %edi, %eax
+; X64-AVX-NEXT:    retq
   %shy = lshr i32 %y, %s
   %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s)
   %or = or i32 %fun, %shy
@@ -1188,15 +1283,15 @@ define i32 @or_lshr_rotr(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2-NEXT:    orl %edx, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: or_lshr_rotr:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edx, %ecx
-; X64-AVX2-NEXT:    movl %esi, %eax
-; X64-AVX2-NEXT:    shrl %cl, %edi
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    rorl %cl, %eax
-; X64-AVX2-NEXT:    orl %edi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: or_lshr_rotr:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edx, %ecx
+; X64-AVX-NEXT:    movl %esi, %eax
+; X64-AVX-NEXT:    shrl %cl, %edi
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    rorl %cl, %eax
+; X64-AVX-NEXT:    orl %edi, %eax
+; X64-AVX-NEXT:    retq
   %shx = lshr i32 %x, %s
   %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s)
   %or = or i32 %rot, %shx
@@ -1217,15 +1312,15 @@ define i32 @or_lshr_fshr_commute(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2-NEXT:    popl %esi
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: or_lshr_fshr_commute:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edx, %ecx
-; X64-AVX2-NEXT:    movl %esi, %eax
-; X64-AVX2-NEXT:    shrl %cl, %eax
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shrdl %cl, %esi, %edi
-; X64-AVX2-NEXT:    orl %edi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: or_lshr_fshr_commute:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edx, %ecx
+; X64-AVX-NEXT:    movl %esi, %eax
+; X64-AVX-NEXT:    shrl %cl, %eax
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    shrdl %cl, %esi, %edi
+; X64-AVX-NEXT:    orl %edi, %eax
+; X64-AVX-NEXT:    retq
   %shy = lshr i32 %y, %s
   %fun = call i32 @llvm.fshr.i32(i32 %y, i32 %x, i32 %s)
   %or = or i32 %shy, %fun
@@ -1243,15 +1338,15 @@ define i32 @or_lshr_rotr_commute(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2-NEXT:    orl %edx, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: or_lshr_rotr_commute:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edx, %ecx
-; X64-AVX2-NEXT:    movl %esi, %eax
-; X64-AVX2-NEXT:    shrl %cl, %edi
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    rorl %cl, %eax
-; X64-AVX2-NEXT:    orl %edi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: or_lshr_rotr_commute:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edx, %ecx
+; X64-AVX-NEXT:    movl %esi, %eax
+; X64-AVX-NEXT:    shrl %cl, %edi
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    rorl %cl, %eax
+; X64-AVX-NEXT:    orl %edi, %eax
+; X64-AVX-NEXT:    retq
   %shx = lshr i32 %x, %s
   %rot = call i32 @llvm.fshr.i32(i32 %y, i32 %y, i32 %s)
   %or = or i32 %shx, %rot
@@ -1267,13 +1362,13 @@ define i32 @or_shl_fshl_simplify(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2-NEXT:    shldl %cl, %edx, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: or_shl_fshl_simplify:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edx, %ecx
-; X64-AVX2-NEXT:    movl %esi, %eax
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shldl %cl, %edi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: or_shl_fshl_simplify:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edx, %ecx
+; X64-AVX-NEXT:    movl %esi, %eax
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    shldl %cl, %edi, %eax
+; X64-AVX-NEXT:    retq
   %shy = shl i32 %y, %s
   %fun = call i32 @llvm.fshl.i32(i32 %y, i32 %x, i32 %s)
   %or = or i32 %fun, %shy
@@ -1289,13 +1384,13 @@ define i32 @or_lshr_fshr_simplify(i32 %x, i32 %y, i32 %s) nounwind {
 ; X86-SSE2-NEXT:    shrdl %cl, %edx, %eax
 ; X86-SSE2-NEXT:    retl
 ;
-; X64-AVX2-LABEL: or_lshr_fshr_simplify:
-; X64-AVX2:       # %bb.0:
-; X64-AVX2-NEXT:    movl %edx, %ecx
-; X64-AVX2-NEXT:    movl %esi, %eax
-; X64-AVX2-NEXT:    # kill: def $cl killed $cl killed $ecx
-; X64-AVX2-NEXT:    shrdl %cl, %edi, %eax
-; X64-AVX2-NEXT:    retq
+; X64-AVX-LABEL: or_lshr_fshr_simplify:
+; X64-AVX:       # %bb.0:
+; X64-AVX-NEXT:    movl %edx, %ecx
+; X64-AVX-NEXT:    movl %esi, %eax
+; X64-AVX-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-AVX-NEXT:    shrdl %cl, %edi, %eax
+; X64-AVX-NEXT:    retq
   %shy = lshr i32 %y, %s
   %fun = call i32 @llvm.fshr.i32(i32 %x, i32 %y, i32 %s)
   %or = or i32 %shy, %fun


        


More information about the llvm-commits mailing list