[llvm] r282688 - [AVX-512] Replicate pattern from AVX to select VMOVDDUP for (v2f64 (X86VBroadcast f64:)). Add AVX512VL to command line of existing AVX2 test that hits this condition.

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 28 22:54:43 PDT 2016


Author: ctopper
Date: Thu Sep 29 00:54:43 2016
New Revision: 282688

URL: http://llvm.org/viewvc/llvm-project?rev=282688&view=rev
Log:
[AVX-512] Replicate pattern from AVX to select VMOVDDUP for (v2f64 (X86VBroadcast f64:)). Add AVX512VL to command line of existing AVX2 test that hits this condition.

Modified:
    llvm/trunk/lib/Target/X86/X86InstrAVX512.td
    llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll

Modified: llvm/trunk/lib/Target/X86/X86InstrAVX512.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrAVX512.td?rev=282688&r1=282687&r2=282688&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrAVX512.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrAVX512.td Thu Sep 29 00:54:43 2016
@@ -8066,10 +8066,14 @@ multiclass avx512_movddup<bits<8> opc, s
 
 defm VMOVDDUP : avx512_movddup<0x12, "vmovddup", X86Movddup>;
 
+let Predicates = [HasVLX] in {
 def : Pat<(X86Movddup (loadv2f64 addr:$src)),
-          (VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>;
+          (VMOVDDUPZ128rm addr:$src)>;
 def : Pat<(v2f64 (X86VBroadcast (loadf64 addr:$src))),
-          (VMOVDDUPZ128rm addr:$src)>, Requires<[HasAVX512, HasVLX]>;
+          (VMOVDDUPZ128rm addr:$src)>;
+def : Pat<(v2f64 (X86VBroadcast f64:$src)),
+          (VMOVDDUPZ128rr (COPY_TO_REGCLASS FR64X:$src, VR128X))>;
+}
 
 //===----------------------------------------------------------------------===//
 // AVX-512 - Unpack Instructions

Modified: llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll?rev=282688&r1=282687&r2=282688&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx2-vbroadcast.ll Thu Sep 29 00:54:43 2016
@@ -1,6 +1,8 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X32
-; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX2
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx2 | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX2
+; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=X32 --check-prefix=X32-AVX512VL
+; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=X64 --check-prefix=X64-AVX512VL
 
 define <16 x i8> @BB16(i8* %ptr) nounwind uwtable readnone ssp {
 ; X32-LABEL: BB16:
@@ -207,22 +209,34 @@ entry:
 }
 
 define <4 x i64> @QQ64(i64* %ptr) nounwind uwtable readnone ssp {
-; X32-LABEL: QQ64:
-; X32:       ## BB#0: ## %entry
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    movl (%eax), %ecx
-; X32-NEXT:    movl 4(%eax), %eax
-; X32-NEXT:    vmovd %ecx, %xmm0
-; X32-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
-; X32-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
-; X32-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
-; X32-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
-; X32-NEXT:    retl
+; X32-AVX2-LABEL: QQ64:
+; X32-AVX2:       ## BB#0: ## %entry
+; X32-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-AVX2-NEXT:    movl (%eax), %ecx
+; X32-AVX2-NEXT:    movl 4(%eax), %eax
+; X32-AVX2-NEXT:    vmovd %ecx, %xmm0
+; X32-AVX2-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; X32-AVX2-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
+; X32-AVX2-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
+; X32-AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX2-NEXT:    retl
 ;
 ; X64-LABEL: QQ64:
 ; X64:       ## BB#0: ## %entry
 ; X64-NEXT:    vbroadcastsd (%rdi), %ymm0
 ; X64-NEXT:    retq
+;
+; X32-AVX512VL-LABEL: QQ64:
+; X32-AVX512VL:       ## BB#0: ## %entry
+; X32-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-AVX512VL-NEXT:    movl (%eax), %ecx
+; X32-AVX512VL-NEXT:    movl 4(%eax), %eax
+; X32-AVX512VL-NEXT:    vmovd %ecx, %xmm0
+; X32-AVX512VL-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0
+; X32-AVX512VL-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0
+; X32-AVX512VL-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0
+; X32-AVX512VL-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm0
+; X32-AVX512VL-NEXT:    retl
 entry:
   %q = load i64, i64* %ptr, align 4
   %q0 = insertelement <4 x i64> undef, i64 %q, i32 0
@@ -233,37 +247,69 @@ entry:
 }
 
 define <8 x i16> @broadcast_mem_v4i16_v8i16(<4 x i16>* %ptr) {
-; X32-LABEL: broadcast_mem_v4i16_v8i16:
-; X32:       ## BB#0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
-; X32-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
-; X32-NEXT:    retl
-;
-; X64-LABEL: broadcast_mem_v4i16_v8i16:
-; X64:       ## BB#0:
-; X64-NEXT:    vpbroadcastq (%rdi), %xmm0
-; X64-NEXT:    retq
+; X32-AVX2-LABEL: broadcast_mem_v4i16_v8i16:
+; X32-AVX2:       ## BB#0:
+; X32-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-AVX2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
+; X32-AVX2-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
+; X32-AVX2-NEXT:    retl
+;
+; X64-AVX2-LABEL: broadcast_mem_v4i16_v8i16:
+; X64-AVX2:       ## BB#0:
+; X64-AVX2-NEXT:    vpbroadcastq (%rdi), %xmm0
+; X64-AVX2-NEXT:    retq
+;
+; X32-AVX512VL-LABEL: broadcast_mem_v4i16_v8i16:
+; X32-AVX512VL:       ## BB#0:
+; X32-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-AVX512VL-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; X32-AVX512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13]
+; X32-AVX512VL-NEXT:    retl
+;
+; X64-AVX512VL-LABEL: broadcast_mem_v4i16_v8i16:
+; X64-AVX512VL:       ## BB#0:
+; X64-AVX512VL-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; X64-AVX512VL-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,0,1,4,5,8,9,12,13]
+; X64-AVX512VL-NEXT:    retq
   %load = load <4 x i16>, <4 x i16>* %ptr
   %shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   ret <8 x i16> %shuf
 }
 
 define <16 x i16> @broadcast_mem_v4i16_v16i16(<4 x i16>* %ptr) {
-; X32-LABEL: broadcast_mem_v4i16_v16i16:
-; X32:       ## BB#0:
-; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X32-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; X32-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; X32-NEXT:    vpbroadcastq %xmm0, %ymm0
-; X32-NEXT:    retl
-;
-; X64-LABEL: broadcast_mem_v4i16_v16i16:
-; X64:       ## BB#0:
-; X64-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
-; X64-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
-; X64-NEXT:    vpbroadcastq %xmm0, %ymm0
-; X64-NEXT:    retq
+; X32-AVX2-LABEL: broadcast_mem_v4i16_v16i16:
+; X32-AVX2:       ## BB#0:
+; X32-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-AVX2-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; X32-AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; X32-AVX2-NEXT:    vpbroadcastq %xmm0, %ymm0
+; X32-AVX2-NEXT:    retl
+;
+; X64-AVX2-LABEL: broadcast_mem_v4i16_v16i16:
+; X64-AVX2:       ## BB#0:
+; X64-AVX2-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; X64-AVX2-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15]
+; X64-AVX2-NEXT:    vpbroadcastq %xmm0, %ymm0
+; X64-AVX2-NEXT:    retq
+;
+; X32-AVX512VL-LABEL: broadcast_mem_v4i16_v16i16:
+; X32-AVX512VL:       ## BB#0:
+; X32-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-AVX512VL-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; X32-AVX512VL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; X32-AVX512VL-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; X32-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X32-AVX512VL-NEXT:    vpbroadcastq %xmm0, %ymm0
+; X32-AVX512VL-NEXT:    retl
+;
+; X64-AVX512VL-LABEL: broadcast_mem_v4i16_v16i16:
+; X64-AVX512VL:       ## BB#0:
+; X64-AVX512VL-NEXT:    vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
+; X64-AVX512VL-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7]
+; X64-AVX512VL-NEXT:    vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7]
+; X64-AVX512VL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3]
+; X64-AVX512VL-NEXT:    vpbroadcastq %xmm0, %ymm0
+; X64-AVX512VL-NEXT:    retq
   %load = load <4 x i16>, <4 x i16>* %ptr
   %shuf = shufflevector <4 x i16> %load, <4 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   ret <16 x i16> %shuf
@@ -598,34 +644,54 @@ entry:
 }
 
 define <8 x i32> @V111(<8 x i32> %in) nounwind uwtable readnone ssp {
-; X32-LABEL: V111:
-; X32:       ## BB#0: ## %entry
-; X32-NEXT:    vpbroadcastd LCPI29_0, %ymm1
-; X32-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
-; X32-NEXT:    retl
-;
-; X64-LABEL: V111:
-; X64:       ## BB#0: ## %entry
-; X64-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
-; X64-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
-; X64-NEXT:    retq
+; X32-AVX2-LABEL: V111:
+; X32-AVX2:       ## BB#0: ## %entry
+; X32-AVX2-NEXT:    vpbroadcastd LCPI29_0, %ymm1
+; X32-AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; X32-AVX2-NEXT:    retl
+;
+; X64-AVX2-LABEL: V111:
+; X64-AVX2:       ## BB#0: ## %entry
+; X64-AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
+; X64-AVX2-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    retq
+;
+; X32-AVX512VL-LABEL: V111:
+; X32-AVX512VL:       ## BB#0: ## %entry
+; X32-AVX512VL-NEXT:    vpaddd LCPI29_0{1to8}, %ymm0, %ymm0
+; X32-AVX512VL-NEXT:    retl
+;
+; X64-AVX512VL-LABEL: V111:
+; X64-AVX512VL:       ## BB#0: ## %entry
+; X64-AVX512VL-NEXT:    vpaddd {{.*}}(%rip){1to8}, %ymm0, %ymm0
+; X64-AVX512VL-NEXT:    retq
 entry:
   %g = add <8 x i32> %in, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
   ret <8 x i32> %g
 }
 
 define <8 x float> @V113(<8 x float> %in) nounwind uwtable readnone ssp {
-; X32-LABEL: V113:
-; X32:       ## BB#0: ## %entry
-; X32-NEXT:    vbroadcastss LCPI30_0, %ymm1
-; X32-NEXT:    vaddps %ymm1, %ymm0, %ymm0
-; X32-NEXT:    retl
-;
-; X64-LABEL: V113:
-; X64:       ## BB#0: ## %entry
-; X64-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
-; X64-NEXT:    vaddps %ymm1, %ymm0, %ymm0
-; X64-NEXT:    retq
+; X32-AVX2-LABEL: V113:
+; X32-AVX2:       ## BB#0: ## %entry
+; X32-AVX2-NEXT:    vbroadcastss LCPI30_0, %ymm1
+; X32-AVX2-NEXT:    vaddps %ymm1, %ymm0, %ymm0
+; X32-AVX2-NEXT:    retl
+;
+; X64-AVX2-LABEL: V113:
+; X64-AVX2:       ## BB#0: ## %entry
+; X64-AVX2-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
+; X64-AVX2-NEXT:    vaddps %ymm1, %ymm0, %ymm0
+; X64-AVX2-NEXT:    retq
+;
+; X32-AVX512VL-LABEL: V113:
+; X32-AVX512VL:       ## BB#0: ## %entry
+; X32-AVX512VL-NEXT:    vaddps LCPI30_0{1to8}, %ymm0, %ymm0
+; X32-AVX512VL-NEXT:    retl
+;
+; X64-AVX512VL-LABEL: V113:
+; X64-AVX512VL:       ## BB#0: ## %entry
+; X64-AVX512VL-NEXT:    vaddps {{.*}}(%rip){1to8}, %ymm0, %ymm0
+; X64-AVX512VL-NEXT:    retq
 entry:
   %g = fadd <8 x float> %in, <float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000, float 0xbf80000000000000>
   ret <8 x float> %g
@@ -725,11 +791,16 @@ define <8 x i32> @_inreg0(i32 %scalar) n
 ; X32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm0
 ; X32-NEXT:    retl
 ;
-; X64-LABEL: _inreg0:
-; X64:       ## BB#0:
-; X64-NEXT:    vmovd %edi, %xmm0
-; X64-NEXT:    vbroadcastss %xmm0, %ymm0
-; X64-NEXT:    retq
+; X64-AVX2-LABEL: _inreg0:
+; X64-AVX2:       ## BB#0:
+; X64-AVX2-NEXT:    vmovd %edi, %xmm0
+; X64-AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512VL-LABEL: _inreg0:
+; X64-AVX512VL:       ## BB#0:
+; X64-AVX512VL-NEXT:    vpbroadcastd %edi, %ymm0
+; X64-AVX512VL-NEXT:    retq
   %in = insertelement <8 x i32> undef, i32 %scalar, i32 0
   %wide = shufflevector <8 x i32> %in, <8 x i32> undef, <8 x i32> zeroinitializer
   ret <8 x i32> %wide
@@ -1034,10 +1105,56 @@ define <4 x double> @splat_concat4(doubl
 ; load will not create a cycle in the DAG.
 ; Those test cases exerce the latter.
 
-; CHECK-LABEL: isel_crash_16b
-; CHECK: vpbroadcastb {{[^,]+}}, %xmm{{[0-9]+}}
-; CHECK: ret
 define void @isel_crash_16b(i8* %cV_R.addr) {
+; X32-AVX2-LABEL: isel_crash_16b:
+; X32-AVX2:       ## BB#0: ## %eintry
+; X32-AVX2-NEXT:    subl $60, %esp
+; X32-AVX2-NEXT:  Ltmp0:
+; X32-AVX2-NEXT:    .cfi_def_cfa_offset 64
+; X32-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; X32-AVX2-NEXT:    vmovaps %xmm0, (%esp)
+; X32-AVX2-NEXT:    vpbroadcastb (%eax), %xmm1
+; X32-AVX2-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT:    vmovdqa %xmm1, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT:    addl $60, %esp
+; X32-AVX2-NEXT:    retl
+;
+; X64-AVX2-LABEL: isel_crash_16b:
+; X64-AVX2:       ## BB#0: ## %eintry
+; X64-AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT:    movb (%rdi), %al
+; X64-AVX2-NEXT:    vmovd %eax, %xmm1
+; X64-AVX2-NEXT:    vpbroadcastb %xmm1, %xmm1
+; X64-AVX2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT:    vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT:    retq
+;
+; X32-AVX512VL-LABEL: isel_crash_16b:
+; X32-AVX512VL:       ## BB#0: ## %eintry
+; X32-AVX512VL-NEXT:    subl $60, %esp
+; X32-AVX512VL-NEXT:  Ltmp0:
+; X32-AVX512VL-NEXT:    .cfi_def_cfa_offset 64
+; X32-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-AVX512VL-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; X32-AVX512VL-NEXT:    vmovaps %xmm0, (%esp)
+; X32-AVX512VL-NEXT:    vpbroadcastb (%eax), %xmm1
+; X32-AVX512VL-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT:    vmovdqa32 %xmm1, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT:    addl $60, %esp
+; X32-AVX512VL-NEXT:    retl
+;
+; X64-AVX512VL-LABEL: isel_crash_16b:
+; X64-AVX512VL:       ## BB#0: ## %eintry
+; X64-AVX512VL-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; X64-AVX512VL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT:    movb (%rdi), %al
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm1
+; X64-AVX512VL-NEXT:    vpbroadcastb %xmm1, %xmm1
+; X64-AVX512VL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT:    vmovdqa32 %xmm1, -{{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT:    retq
 eintry:
   %__a.addr.i = alloca <2 x i64>, align 16
   %__b.addr.i = alloca <2 x i64>, align 16
@@ -1053,10 +1170,98 @@ eintry:
   ret void
 }
 
-; CHECK-LABEL: isel_crash_32b
-; CHECK: vpbroadcastb {{[^,]+}}, %ymm{{[0-9]+}}
-; CHECK: ret
 define void @isel_crash_32b(i8* %cV_R.addr) {
+; X32-AVX2-LABEL: isel_crash_32b:
+; X32-AVX2:       ## BB#0: ## %eintry
+; X32-AVX2-NEXT:    pushl %ebp
+; X32-AVX2-NEXT:  Ltmp1:
+; X32-AVX2-NEXT:    .cfi_def_cfa_offset 8
+; X32-AVX2-NEXT:  Ltmp2:
+; X32-AVX2-NEXT:    .cfi_offset %ebp, -8
+; X32-AVX2-NEXT:    movl %esp, %ebp
+; X32-AVX2-NEXT:  Ltmp3:
+; X32-AVX2-NEXT:    .cfi_def_cfa_register %ebp
+; X32-AVX2-NEXT:    andl $-32, %esp
+; X32-AVX2-NEXT:    subl $128, %esp
+; X32-AVX2-NEXT:    movl 8(%ebp), %eax
+; X32-AVX2-NEXT:    vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX2-NEXT:    vmovaps %ymm0, (%esp)
+; X32-AVX2-NEXT:    vpbroadcastb (%eax), %ymm1
+; X32-AVX2-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT:    vmovdqa %ymm1, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT:    movl %ebp, %esp
+; X32-AVX2-NEXT:    popl %ebp
+; X32-AVX2-NEXT:    vzeroupper
+; X32-AVX2-NEXT:    retl
+;
+; X64-AVX2-LABEL: isel_crash_32b:
+; X64-AVX2:       ## BB#0: ## %eintry
+; X64-AVX2-NEXT:    pushq %rbp
+; X64-AVX2-NEXT:  Ltmp0:
+; X64-AVX2-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX2-NEXT:  Ltmp1:
+; X64-AVX2-NEXT:    .cfi_offset %rbp, -16
+; X64-AVX2-NEXT:    movq %rsp, %rbp
+; X64-AVX2-NEXT:  Ltmp2:
+; X64-AVX2-NEXT:    .cfi_def_cfa_register %rbp
+; X64-AVX2-NEXT:    andq $-32, %rsp
+; X64-AVX2-NEXT:    subq $128, %rsp
+; X64-AVX2-NEXT:    vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vmovaps %ymm0, (%rsp)
+; X64-AVX2-NEXT:    movb (%rdi), %al
+; X64-AVX2-NEXT:    vmovd %eax, %xmm1
+; X64-AVX2-NEXT:    vpbroadcastb %xmm1, %ymm1
+; X64-AVX2-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT:    vmovdqa %ymm1, {{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT:    movq %rbp, %rsp
+; X64-AVX2-NEXT:    popq %rbp
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    retq
+;
+; X32-AVX512VL-LABEL: isel_crash_32b:
+; X32-AVX512VL:       ## BB#0: ## %eintry
+; X32-AVX512VL-NEXT:    pushl %ebp
+; X32-AVX512VL-NEXT:  Ltmp1:
+; X32-AVX512VL-NEXT:    .cfi_def_cfa_offset 8
+; X32-AVX512VL-NEXT:  Ltmp2:
+; X32-AVX512VL-NEXT:    .cfi_offset %ebp, -8
+; X32-AVX512VL-NEXT:    movl %esp, %ebp
+; X32-AVX512VL-NEXT:  Ltmp3:
+; X32-AVX512VL-NEXT:    .cfi_def_cfa_register %ebp
+; X32-AVX512VL-NEXT:    andl $-32, %esp
+; X32-AVX512VL-NEXT:    subl $128, %esp
+; X32-AVX512VL-NEXT:    movl 8(%ebp), %eax
+; X32-AVX512VL-NEXT:    vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX512VL-NEXT:    vmovaps %ymm0, (%esp)
+; X32-AVX512VL-NEXT:    vpbroadcastb (%eax), %ymm1
+; X32-AVX512VL-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT:    vmovdqa32 %ymm1, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT:    movl %ebp, %esp
+; X32-AVX512VL-NEXT:    popl %ebp
+; X32-AVX512VL-NEXT:    retl
+;
+; X64-AVX512VL-LABEL: isel_crash_32b:
+; X64-AVX512VL:       ## BB#0: ## %eintry
+; X64-AVX512VL-NEXT:    pushq %rbp
+; X64-AVX512VL-NEXT:  Ltmp0:
+; X64-AVX512VL-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX512VL-NEXT:  Ltmp1:
+; X64-AVX512VL-NEXT:    .cfi_offset %rbp, -16
+; X64-AVX512VL-NEXT:    movq %rsp, %rbp
+; X64-AVX512VL-NEXT:  Ltmp2:
+; X64-AVX512VL-NEXT:    .cfi_def_cfa_register %rbp
+; X64-AVX512VL-NEXT:    andq $-32, %rsp
+; X64-AVX512VL-NEXT:    subq $128, %rsp
+; X64-AVX512VL-NEXT:    vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX512VL-NEXT:    vmovaps %ymm0, (%rsp)
+; X64-AVX512VL-NEXT:    movb (%rdi), %al
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm1
+; X64-AVX512VL-NEXT:    vpbroadcastb %xmm1, %ymm1
+; X64-AVX512VL-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT:    vmovdqa32 %ymm1, {{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT:    movq %rbp, %rsp
+; X64-AVX512VL-NEXT:    popq %rbp
+; X64-AVX512VL-NEXT:    retq
 eintry:
   %__a.addr.i = alloca <4 x i64>, align 16
   %__b.addr.i = alloca <4 x i64>, align 16
@@ -1072,10 +1277,56 @@ eintry:
   ret void
 }
 
-; CHECK-LABEL: isel_crash_8w
-; CHECK: vpbroadcastw {{[^,]+}}, %xmm{{[0-9]+}}
-; CHECK: ret
 define void @isel_crash_8w(i16* %cV_R.addr) {
+; X32-AVX2-LABEL: isel_crash_8w:
+; X32-AVX2:       ## BB#0: ## %entry
+; X32-AVX2-NEXT:    subl $60, %esp
+; X32-AVX2-NEXT:  Ltmp4:
+; X32-AVX2-NEXT:    .cfi_def_cfa_offset 64
+; X32-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; X32-AVX2-NEXT:    vmovaps %xmm0, (%esp)
+; X32-AVX2-NEXT:    vpbroadcastw (%eax), %xmm1
+; X32-AVX2-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT:    vmovdqa %xmm1, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT:    addl $60, %esp
+; X32-AVX2-NEXT:    retl
+;
+; X64-AVX2-LABEL: isel_crash_8w:
+; X64-AVX2:       ## BB#0: ## %entry
+; X64-AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT:    movw (%rdi), %ax
+; X64-AVX2-NEXT:    vmovd %eax, %xmm1
+; X64-AVX2-NEXT:    vpbroadcastw %xmm1, %xmm1
+; X64-AVX2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT:    vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT:    retq
+;
+; X32-AVX512VL-LABEL: isel_crash_8w:
+; X32-AVX512VL:       ## BB#0: ## %entry
+; X32-AVX512VL-NEXT:    subl $60, %esp
+; X32-AVX512VL-NEXT:  Ltmp4:
+; X32-AVX512VL-NEXT:    .cfi_def_cfa_offset 64
+; X32-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-AVX512VL-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; X32-AVX512VL-NEXT:    vmovaps %xmm0, (%esp)
+; X32-AVX512VL-NEXT:    vpbroadcastw (%eax), %xmm1
+; X32-AVX512VL-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT:    vmovdqa32 %xmm1, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT:    addl $60, %esp
+; X32-AVX512VL-NEXT:    retl
+;
+; X64-AVX512VL-LABEL: isel_crash_8w:
+; X64-AVX512VL:       ## BB#0: ## %entry
+; X64-AVX512VL-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; X64-AVX512VL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT:    movw (%rdi), %ax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm1
+; X64-AVX512VL-NEXT:    vpbroadcastw %xmm1, %xmm1
+; X64-AVX512VL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT:    vmovdqa32 %xmm1, -{{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT:    retq
 entry:
   %__a.addr.i = alloca <2 x i64>, align 16
   %__b.addr.i = alloca <2 x i64>, align 16
@@ -1091,10 +1342,98 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: isel_crash_16w
-; CHECK: vpbroadcastw {{[^,]+}}, %ymm{{[0-9]+}}
-; CHECK: ret
 define void @isel_crash_16w(i16* %cV_R.addr) {
+; X32-AVX2-LABEL: isel_crash_16w:
+; X32-AVX2:       ## BB#0: ## %eintry
+; X32-AVX2-NEXT:    pushl %ebp
+; X32-AVX2-NEXT:  Ltmp5:
+; X32-AVX2-NEXT:    .cfi_def_cfa_offset 8
+; X32-AVX2-NEXT:  Ltmp6:
+; X32-AVX2-NEXT:    .cfi_offset %ebp, -8
+; X32-AVX2-NEXT:    movl %esp, %ebp
+; X32-AVX2-NEXT:  Ltmp7:
+; X32-AVX2-NEXT:    .cfi_def_cfa_register %ebp
+; X32-AVX2-NEXT:    andl $-32, %esp
+; X32-AVX2-NEXT:    subl $128, %esp
+; X32-AVX2-NEXT:    movl 8(%ebp), %eax
+; X32-AVX2-NEXT:    vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX2-NEXT:    vmovaps %ymm0, (%esp)
+; X32-AVX2-NEXT:    vpbroadcastw (%eax), %ymm1
+; X32-AVX2-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT:    vmovdqa %ymm1, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT:    movl %ebp, %esp
+; X32-AVX2-NEXT:    popl %ebp
+; X32-AVX2-NEXT:    vzeroupper
+; X32-AVX2-NEXT:    retl
+;
+; X64-AVX2-LABEL: isel_crash_16w:
+; X64-AVX2:       ## BB#0: ## %eintry
+; X64-AVX2-NEXT:    pushq %rbp
+; X64-AVX2-NEXT:  Ltmp3:
+; X64-AVX2-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX2-NEXT:  Ltmp4:
+; X64-AVX2-NEXT:    .cfi_offset %rbp, -16
+; X64-AVX2-NEXT:    movq %rsp, %rbp
+; X64-AVX2-NEXT:  Ltmp5:
+; X64-AVX2-NEXT:    .cfi_def_cfa_register %rbp
+; X64-AVX2-NEXT:    andq $-32, %rsp
+; X64-AVX2-NEXT:    subq $128, %rsp
+; X64-AVX2-NEXT:    vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vmovaps %ymm0, (%rsp)
+; X64-AVX2-NEXT:    movw (%rdi), %ax
+; X64-AVX2-NEXT:    vmovd %eax, %xmm1
+; X64-AVX2-NEXT:    vpbroadcastw %xmm1, %ymm1
+; X64-AVX2-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT:    vmovdqa %ymm1, {{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT:    movq %rbp, %rsp
+; X64-AVX2-NEXT:    popq %rbp
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    retq
+;
+; X32-AVX512VL-LABEL: isel_crash_16w:
+; X32-AVX512VL:       ## BB#0: ## %eintry
+; X32-AVX512VL-NEXT:    pushl %ebp
+; X32-AVX512VL-NEXT:  Ltmp5:
+; X32-AVX512VL-NEXT:    .cfi_def_cfa_offset 8
+; X32-AVX512VL-NEXT:  Ltmp6:
+; X32-AVX512VL-NEXT:    .cfi_offset %ebp, -8
+; X32-AVX512VL-NEXT:    movl %esp, %ebp
+; X32-AVX512VL-NEXT:  Ltmp7:
+; X32-AVX512VL-NEXT:    .cfi_def_cfa_register %ebp
+; X32-AVX512VL-NEXT:    andl $-32, %esp
+; X32-AVX512VL-NEXT:    subl $128, %esp
+; X32-AVX512VL-NEXT:    movl 8(%ebp), %eax
+; X32-AVX512VL-NEXT:    vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX512VL-NEXT:    vmovaps %ymm0, (%esp)
+; X32-AVX512VL-NEXT:    vpbroadcastw (%eax), %ymm1
+; X32-AVX512VL-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT:    vmovdqa32 %ymm1, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT:    movl %ebp, %esp
+; X32-AVX512VL-NEXT:    popl %ebp
+; X32-AVX512VL-NEXT:    retl
+;
+; X64-AVX512VL-LABEL: isel_crash_16w:
+; X64-AVX512VL:       ## BB#0: ## %eintry
+; X64-AVX512VL-NEXT:    pushq %rbp
+; X64-AVX512VL-NEXT:  Ltmp3:
+; X64-AVX512VL-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX512VL-NEXT:  Ltmp4:
+; X64-AVX512VL-NEXT:    .cfi_offset %rbp, -16
+; X64-AVX512VL-NEXT:    movq %rsp, %rbp
+; X64-AVX512VL-NEXT:  Ltmp5:
+; X64-AVX512VL-NEXT:    .cfi_def_cfa_register %rbp
+; X64-AVX512VL-NEXT:    andq $-32, %rsp
+; X64-AVX512VL-NEXT:    subq $128, %rsp
+; X64-AVX512VL-NEXT:    vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX512VL-NEXT:    vmovaps %ymm0, (%rsp)
+; X64-AVX512VL-NEXT:    movw (%rdi), %ax
+; X64-AVX512VL-NEXT:    vmovd %eax, %xmm1
+; X64-AVX512VL-NEXT:    vpbroadcastw %xmm1, %ymm1
+; X64-AVX512VL-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT:    vmovdqa32 %ymm1, {{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT:    movq %rbp, %rsp
+; X64-AVX512VL-NEXT:    popq %rbp
+; X64-AVX512VL-NEXT:    retq
 eintry:
   %__a.addr.i = alloca <4 x i64>, align 16
   %__b.addr.i = alloca <4 x i64>, align 16
@@ -1110,10 +1449,41 @@ eintry:
   ret void
 }
 
-; CHECK-LABEL: isel_crash_4d
-; CHECK: vbroadcastss {{[^,]+}}, %xmm{{[0-9]+}}
-; CHECK: ret
 define void @isel_crash_4d(i32* %cV_R.addr) {
+; X32-LABEL: isel_crash_4d:
+; X32:       ## BB#0: ## %entry
+; X32-NEXT:    subl $60, %esp
+; X32-NEXT:  Ltmp8:
+; X32-NEXT:    .cfi_def_cfa_offset 64
+; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; X32-NEXT:    vmovaps %xmm0, (%esp)
+; X32-NEXT:    vbroadcastss (%eax), %xmm1
+; X32-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%esp)
+; X32-NEXT:    vmovaps %xmm1, {{[0-9]+}}(%esp)
+; X32-NEXT:    addl $60, %esp
+; X32-NEXT:    retl
+;
+; X64-AVX2-LABEL: isel_crash_4d:
+; X64-AVX2:       ## BB#0: ## %entry
+; X64-AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT:    movl (%rdi), %eax
+; X64-AVX2-NEXT:    vmovd %eax, %xmm1
+; X64-AVX2-NEXT:    vbroadcastss %xmm1, %xmm1
+; X64-AVX2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT:    vmovaps %xmm1, -{{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT:    retq
+;
+; X64-AVX512VL-LABEL: isel_crash_4d:
+; X64-AVX512VL:       ## BB#0: ## %entry
+; X64-AVX512VL-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; X64-AVX512VL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT:    movl (%rdi), %eax
+; X64-AVX512VL-NEXT:    vpbroadcastd %eax, %xmm1
+; X64-AVX512VL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT:    vmovaps %xmm1, -{{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT:    retq
 entry:
   %__a.addr.i = alloca <2 x i64>, align 16
   %__b.addr.i = alloca <2 x i64>, align 16
@@ -1129,10 +1499,97 @@ entry:
   ret void
 }
 
-; CHECK-LABEL: isel_crash_8d
-; CHECK: vbroadcastss {{[^,]+}}, %ymm{{[0-9]+}}
-; CHECK: ret
 define void @isel_crash_8d(i32* %cV_R.addr) {
+; X32-AVX2-LABEL: isel_crash_8d:
+; X32-AVX2:       ## BB#0: ## %eintry
+; X32-AVX2-NEXT:    pushl %ebp
+; X32-AVX2-NEXT:  Ltmp9:
+; X32-AVX2-NEXT:    .cfi_def_cfa_offset 8
+; X32-AVX2-NEXT:  Ltmp10:
+; X32-AVX2-NEXT:    .cfi_offset %ebp, -8
+; X32-AVX2-NEXT:    movl %esp, %ebp
+; X32-AVX2-NEXT:  Ltmp11:
+; X32-AVX2-NEXT:    .cfi_def_cfa_register %ebp
+; X32-AVX2-NEXT:    andl $-32, %esp
+; X32-AVX2-NEXT:    subl $128, %esp
+; X32-AVX2-NEXT:    movl 8(%ebp), %eax
+; X32-AVX2-NEXT:    vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX2-NEXT:    vmovaps %ymm0, (%esp)
+; X32-AVX2-NEXT:    vbroadcastss (%eax), %ymm1
+; X32-AVX2-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT:    movl %ebp, %esp
+; X32-AVX2-NEXT:    popl %ebp
+; X32-AVX2-NEXT:    vzeroupper
+; X32-AVX2-NEXT:    retl
+;
+; X64-AVX2-LABEL: isel_crash_8d:
+; X64-AVX2:       ## BB#0: ## %eintry
+; X64-AVX2-NEXT:    pushq %rbp
+; X64-AVX2-NEXT:  Ltmp6:
+; X64-AVX2-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX2-NEXT:  Ltmp7:
+; X64-AVX2-NEXT:    .cfi_offset %rbp, -16
+; X64-AVX2-NEXT:    movq %rsp, %rbp
+; X64-AVX2-NEXT:  Ltmp8:
+; X64-AVX2-NEXT:    .cfi_def_cfa_register %rbp
+; X64-AVX2-NEXT:    andq $-32, %rsp
+; X64-AVX2-NEXT:    subq $128, %rsp
+; X64-AVX2-NEXT:    vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vmovaps %ymm0, (%rsp)
+; X64-AVX2-NEXT:    movl (%rdi), %eax
+; X64-AVX2-NEXT:    vmovd %eax, %xmm1
+; X64-AVX2-NEXT:    vbroadcastss %xmm1, %ymm1
+; X64-AVX2-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT:    movq %rbp, %rsp
+; X64-AVX2-NEXT:    popq %rbp
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    retq
+;
+; X32-AVX512VL-LABEL: isel_crash_8d:
+; X32-AVX512VL:       ## BB#0: ## %eintry
+; X32-AVX512VL-NEXT:    pushl %ebp
+; X32-AVX512VL-NEXT:  Ltmp9:
+; X32-AVX512VL-NEXT:    .cfi_def_cfa_offset 8
+; X32-AVX512VL-NEXT:  Ltmp10:
+; X32-AVX512VL-NEXT:    .cfi_offset %ebp, -8
+; X32-AVX512VL-NEXT:    movl %esp, %ebp
+; X32-AVX512VL-NEXT:  Ltmp11:
+; X32-AVX512VL-NEXT:    .cfi_def_cfa_register %ebp
+; X32-AVX512VL-NEXT:    andl $-32, %esp
+; X32-AVX512VL-NEXT:    subl $128, %esp
+; X32-AVX512VL-NEXT:    movl 8(%ebp), %eax
+; X32-AVX512VL-NEXT:    vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX512VL-NEXT:    vmovaps %ymm0, (%esp)
+; X32-AVX512VL-NEXT:    vbroadcastss (%eax), %ymm1
+; X32-AVX512VL-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT:    movl %ebp, %esp
+; X32-AVX512VL-NEXT:    popl %ebp
+; X32-AVX512VL-NEXT:    retl
+;
+; X64-AVX512VL-LABEL: isel_crash_8d:
+; X64-AVX512VL:       ## BB#0: ## %eintry
+; X64-AVX512VL-NEXT:    pushq %rbp
+; X64-AVX512VL-NEXT:  Ltmp6:
+; X64-AVX512VL-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX512VL-NEXT:  Ltmp7:
+; X64-AVX512VL-NEXT:    .cfi_offset %rbp, -16
+; X64-AVX512VL-NEXT:    movq %rsp, %rbp
+; X64-AVX512VL-NEXT:  Ltmp8:
+; X64-AVX512VL-NEXT:    .cfi_def_cfa_register %rbp
+; X64-AVX512VL-NEXT:    andq $-32, %rsp
+; X64-AVX512VL-NEXT:    subq $128, %rsp
+; X64-AVX512VL-NEXT:    vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX512VL-NEXT:    vmovaps %ymm0, (%rsp)
+; X64-AVX512VL-NEXT:    movl (%rdi), %eax
+; X64-AVX512VL-NEXT:    vpbroadcastd %eax, %ymm1
+; X64-AVX512VL-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT:    movq %rbp, %rsp
+; X64-AVX512VL-NEXT:    popq %rbp
+; X64-AVX512VL-NEXT:    retq
 eintry:
   %__a.addr.i = alloca <4 x i64>, align 16
   %__b.addr.i = alloca <4 x i64>, align 16
@@ -1152,6 +1609,64 @@ eintry:
 ; X64: vpbroadcastq {{[^,]+}}, %xmm{{[0-9]+}}
 ; X64: ret
 define void @isel_crash_2q(i64* %cV_R.addr) {
+; X32-AVX2-LABEL: isel_crash_2q:
+; X32-AVX2:       ## BB#0: ## %entry
+; X32-AVX2-NEXT:    subl $60, %esp
+; X32-AVX2-NEXT:  Ltmp12:
+; X32-AVX2-NEXT:    .cfi_def_cfa_offset 64
+; X32-AVX2-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; X32-AVX2-NEXT:    vmovaps %xmm0, (%esp)
+; X32-AVX2-NEXT:    movl (%eax), %ecx
+; X32-AVX2-NEXT:    movl 4(%eax), %eax
+; X32-AVX2-NEXT:    vmovd %ecx, %xmm1
+; X32-AVX2-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
+; X32-AVX2-NEXT:    vpinsrd $2, %ecx, %xmm1, %xmm1
+; X32-AVX2-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm1
+; X32-AVX2-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT:    vmovdqa %xmm1, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT:    addl $60, %esp
+; X32-AVX2-NEXT:    retl
+;
+; X64-AVX2-LABEL: isel_crash_2q:
+; X64-AVX2:       ## BB#0: ## %entry
+; X64-AVX2-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; X64-AVX2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT:    movq (%rdi), %rax
+; X64-AVX2-NEXT:    vmovq %rax, %xmm1
+; X64-AVX2-NEXT:    vpbroadcastq %xmm1, %xmm1
+; X64-AVX2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT:    vmovdqa %xmm1, -{{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT:    retq
+;
+; X32-AVX512VL-LABEL: isel_crash_2q:
+; X32-AVX512VL:       ## BB#0: ## %entry
+; X32-AVX512VL-NEXT:    subl $60, %esp
+; X32-AVX512VL-NEXT:  Ltmp12:
+; X32-AVX512VL-NEXT:    .cfi_def_cfa_offset 64
+; X32-AVX512VL-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X32-AVX512VL-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; X32-AVX512VL-NEXT:    vmovaps %xmm0, (%esp)
+; X32-AVX512VL-NEXT:    movl (%eax), %ecx
+; X32-AVX512VL-NEXT:    movl 4(%eax), %eax
+; X32-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X32-AVX512VL-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
+; X32-AVX512VL-NEXT:    vpinsrd $2, %ecx, %xmm1, %xmm1
+; X32-AVX512VL-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm1
+; X32-AVX512VL-NEXT:    vmovaps %xmm0, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT:    vmovdqa32 %xmm1, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT:    addl $60, %esp
+; X32-AVX512VL-NEXT:    retl
+;
+; X64-AVX512VL-LABEL: isel_crash_2q:
+; X64-AVX512VL:       ## BB#0: ## %entry
+; X64-AVX512VL-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; X64-AVX512VL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT:    movq (%rdi), %rax
+; X64-AVX512VL-NEXT:    vpbroadcastq %rax, %xmm1
+; X64-AVX512VL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT:    vmovaps %xmm1, -{{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT:    retq
 entry:
   %__a.addr.i = alloca <2 x i64>, align 16
   %__b.addr.i = alloca <2 x i64>, align 16
@@ -1170,6 +1685,108 @@ entry:
 ; X64: vbroadcastsd {{[^,]+}}, %ymm{{[0-9]+}}
 ; X64: ret
 define void @isel_crash_4q(i64* %cV_R.addr) {
+; X32-AVX2-LABEL: isel_crash_4q:
+; X32-AVX2:       ## BB#0: ## %eintry
+; X32-AVX2-NEXT:    pushl %ebp
+; X32-AVX2-NEXT:  Ltmp13:
+; X32-AVX2-NEXT:    .cfi_def_cfa_offset 8
+; X32-AVX2-NEXT:  Ltmp14:
+; X32-AVX2-NEXT:    .cfi_offset %ebp, -8
+; X32-AVX2-NEXT:    movl %esp, %ebp
+; X32-AVX2-NEXT:  Ltmp15:
+; X32-AVX2-NEXT:    .cfi_def_cfa_register %ebp
+; X32-AVX2-NEXT:    andl $-32, %esp
+; X32-AVX2-NEXT:    subl $128, %esp
+; X32-AVX2-NEXT:    movl 8(%ebp), %eax
+; X32-AVX2-NEXT:    vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX2-NEXT:    vmovaps %ymm0, (%esp)
+; X32-AVX2-NEXT:    movl (%eax), %ecx
+; X32-AVX2-NEXT:    movl 4(%eax), %eax
+; X32-AVX2-NEXT:    vmovd %ecx, %xmm1
+; X32-AVX2-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
+; X32-AVX2-NEXT:    vpinsrd $2, %ecx, %xmm1, %xmm1
+; X32-AVX2-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm1
+; X32-AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm1
+; X32-AVX2-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT:    vmovdqa %ymm1, {{[0-9]+}}(%esp)
+; X32-AVX2-NEXT:    movl %ebp, %esp
+; X32-AVX2-NEXT:    popl %ebp
+; X32-AVX2-NEXT:    vzeroupper
+; X32-AVX2-NEXT:    retl
+;
+; X64-AVX2-LABEL: isel_crash_4q:
+; X64-AVX2:       ## BB#0: ## %eintry
+; X64-AVX2-NEXT:    pushq %rbp
+; X64-AVX2-NEXT:  Ltmp9:
+; X64-AVX2-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX2-NEXT:  Ltmp10:
+; X64-AVX2-NEXT:    .cfi_offset %rbp, -16
+; X64-AVX2-NEXT:    movq %rsp, %rbp
+; X64-AVX2-NEXT:  Ltmp11:
+; X64-AVX2-NEXT:    .cfi_def_cfa_register %rbp
+; X64-AVX2-NEXT:    andq $-32, %rsp
+; X64-AVX2-NEXT:    subq $128, %rsp
+; X64-AVX2-NEXT:    vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX2-NEXT:    vmovaps %ymm0, (%rsp)
+; X64-AVX2-NEXT:    movq (%rdi), %rax
+; X64-AVX2-NEXT:    vmovq %rax, %xmm1
+; X64-AVX2-NEXT:    vbroadcastsd %xmm1, %ymm1
+; X64-AVX2-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
+; X64-AVX2-NEXT:    movq %rbp, %rsp
+; X64-AVX2-NEXT:    popq %rbp
+; X64-AVX2-NEXT:    vzeroupper
+; X64-AVX2-NEXT:    retq
+;
+; X32-AVX512VL-LABEL: isel_crash_4q:
+; X32-AVX512VL:       ## BB#0: ## %eintry
+; X32-AVX512VL-NEXT:    pushl %ebp
+; X32-AVX512VL-NEXT:  Ltmp13:
+; X32-AVX512VL-NEXT:    .cfi_def_cfa_offset 8
+; X32-AVX512VL-NEXT:  Ltmp14:
+; X32-AVX512VL-NEXT:    .cfi_offset %ebp, -8
+; X32-AVX512VL-NEXT:    movl %esp, %ebp
+; X32-AVX512VL-NEXT:  Ltmp15:
+; X32-AVX512VL-NEXT:    .cfi_def_cfa_register %ebp
+; X32-AVX512VL-NEXT:    andl $-32, %esp
+; X32-AVX512VL-NEXT:    subl $128, %esp
+; X32-AVX512VL-NEXT:    movl 8(%ebp), %eax
+; X32-AVX512VL-NEXT:    vxorps %ymm0, %ymm0, %ymm0
+; X32-AVX512VL-NEXT:    vmovaps %ymm0, (%esp)
+; X32-AVX512VL-NEXT:    movl (%eax), %ecx
+; X32-AVX512VL-NEXT:    movl 4(%eax), %eax
+; X32-AVX512VL-NEXT:    vmovd %ecx, %xmm1
+; X32-AVX512VL-NEXT:    vpinsrd $1, %eax, %xmm1, %xmm1
+; X32-AVX512VL-NEXT:    vpinsrd $2, %ecx, %xmm1, %xmm1
+; X32-AVX512VL-NEXT:    vpinsrd $3, %eax, %xmm1, %xmm1
+; X32-AVX512VL-NEXT:    vinserti32x4 $1, %xmm1, %ymm1, %ymm1
+; X32-AVX512VL-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT:    vmovdqa32 %ymm1, {{[0-9]+}}(%esp)
+; X32-AVX512VL-NEXT:    movl %ebp, %esp
+; X32-AVX512VL-NEXT:    popl %ebp
+; X32-AVX512VL-NEXT:    retl
+;
+; X64-AVX512VL-LABEL: isel_crash_4q:
+; X64-AVX512VL:       ## BB#0: ## %eintry
+; X64-AVX512VL-NEXT:    pushq %rbp
+; X64-AVX512VL-NEXT:  Ltmp9:
+; X64-AVX512VL-NEXT:    .cfi_def_cfa_offset 16
+; X64-AVX512VL-NEXT:  Ltmp10:
+; X64-AVX512VL-NEXT:    .cfi_offset %rbp, -16
+; X64-AVX512VL-NEXT:    movq %rsp, %rbp
+; X64-AVX512VL-NEXT:  Ltmp11:
+; X64-AVX512VL-NEXT:    .cfi_def_cfa_register %rbp
+; X64-AVX512VL-NEXT:    andq $-32, %rsp
+; X64-AVX512VL-NEXT:    subq $128, %rsp
+; X64-AVX512VL-NEXT:    vxorps %ymm0, %ymm0, %ymm0
+; X64-AVX512VL-NEXT:    vmovaps %ymm0, (%rsp)
+; X64-AVX512VL-NEXT:    movq (%rdi), %rax
+; X64-AVX512VL-NEXT:    vpbroadcastq %rax, %ymm1
+; X64-AVX512VL-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp)
+; X64-AVX512VL-NEXT:    movq %rbp, %rsp
+; X64-AVX512VL-NEXT:    popq %rbp
+; X64-AVX512VL-NEXT:    retq
 eintry:
   %__a.addr.i = alloca <4 x i64>, align 16
   %__b.addr.i = alloca <4 x i64>, align 16




More information about the llvm-commits mailing list