[llvm] r302979 - InstCombine: Move tests that use target intrinsics into subdirectories

Fri May 12 22:39:47 PDT 2017

Author: bogner
Date: Sat May 13 00:39:46 2017
New Revision: 302979

URL: http://llvm.org/viewvc/llvm-project?rev=302979&view=rev
Log:
InstCombine: Move tests that use target intrinsics into subdirectories

Tests with target intrinsics are inherently target specific, so it
doesn't actually make sense to run them if we've excluded their
target.

Added:
    llvm/trunk/test/Transforms/InstCombine/AArch64/
    llvm/trunk/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
    llvm/trunk/test/Transforms/InstCombine/AArch64/lit.local.cfg
    llvm/trunk/test/Transforms/InstCombine/AMDGPU/
    llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/amdgcn-intrinsics.ll
    llvm/trunk/test/Transforms/InstCombine/AMDGPU/lit.local.cfg
    llvm/trunk/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll
    llvm/trunk/test/Transforms/InstCombine/ARM/constant-fold-hang.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/constant-fold-hang.ll
    llvm/trunk/test/Transforms/InstCombine/ARM/lit.local.cfg
    llvm/trunk/test/Transforms/InstCombine/ARM/neon-intrinsics.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/neon-intrinsics.ll
    llvm/trunk/test/Transforms/InstCombine/PowerPC/
    llvm/trunk/test/Transforms/InstCombine/PowerPC/aligned-altivec.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/aligned-altivec.ll
    llvm/trunk/test/Transforms/InstCombine/PowerPC/aligned-qpx.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/aligned-qpx.ll
    llvm/trunk/test/Transforms/InstCombine/PowerPC/lit.local.cfg
    llvm/trunk/test/Transforms/InstCombine/PowerPC/vsx-unaligned.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/vsx-unaligned.ll
    llvm/trunk/test/Transforms/InstCombine/X86/
    llvm/trunk/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/X86FsubCmpCombine.ll
    llvm/trunk/test/Transforms/InstCombine/X86/blend_x86.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/blend_x86.ll
    llvm/trunk/test/Transforms/InstCombine/X86/lit.local.cfg
    llvm/trunk/test/Transforms/InstCombine/X86/pr2645-1.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/pr2645-1.ll
    llvm/trunk/test/Transforms/InstCombine/X86/shufflemask-undef.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/shufflemask-undef.ll
    llvm/trunk/test/Transforms/InstCombine/X86/x86-avx2.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/x86-avx2.ll
    llvm/trunk/test/Transforms/InstCombine/X86/x86-avx512.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/x86-avx512.ll
    llvm/trunk/test/Transforms/InstCombine/X86/x86-crc32-demanded.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/x86-crc32-demanded.ll
    llvm/trunk/test/Transforms/InstCombine/X86/x86-f16c.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/x86-f16c.ll
    llvm/trunk/test/Transforms/InstCombine/X86/x86-fma.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/x86-fma.ll
    llvm/trunk/test/Transforms/InstCombine/X86/x86-insertps.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/x86-insertps.ll
    llvm/trunk/test/Transforms/InstCombine/X86/x86-masked-memops.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/x86-masked-memops.ll
    llvm/trunk/test/Transforms/InstCombine/X86/x86-movmsk.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/x86-movmsk.ll
    llvm/trunk/test/Transforms/InstCombine/X86/x86-muldq.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/x86-muldq.ll
    llvm/trunk/test/Transforms/InstCombine/X86/x86-pack.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/x86-pack.ll
    llvm/trunk/test/Transforms/InstCombine/X86/x86-pshufb.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll
    llvm/trunk/test/Transforms/InstCombine/X86/x86-sse.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/x86-sse.ll
    llvm/trunk/test/Transforms/InstCombine/X86/x86-sse2.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/x86-sse2.ll
    llvm/trunk/test/Transforms/InstCombine/X86/x86-sse41.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/x86-sse41.ll
    llvm/trunk/test/Transforms/InstCombine/X86/x86-sse4a.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/x86-sse4a.ll
    llvm/trunk/test/Transforms/InstCombine/X86/x86-vec_demanded_elts.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll
    llvm/trunk/test/Transforms/InstCombine/X86/x86-vector-shifts.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/x86-vector-shifts.ll
    llvm/trunk/test/Transforms/InstCombine/X86/x86-vperm2.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/x86-vperm2.ll
    llvm/trunk/test/Transforms/InstCombine/X86/x86-vpermil.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/x86-vpermil.ll
    llvm/trunk/test/Transforms/InstCombine/X86/x86-xop.ll
      - copied, changed from r302977, llvm/trunk/test/Transforms/InstCombine/x86-xop.ll
Removed:
    llvm/trunk/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
    llvm/trunk/test/Transforms/InstCombine/X86FsubCmpCombine.ll
    llvm/trunk/test/Transforms/InstCombine/aligned-altivec.ll
    llvm/trunk/test/Transforms/InstCombine/aligned-qpx.ll
    llvm/trunk/test/Transforms/InstCombine/amdgcn-intrinsics.ll
    llvm/trunk/test/Transforms/InstCombine/blend_x86.ll
    llvm/trunk/test/Transforms/InstCombine/constant-fold-hang.ll
    llvm/trunk/test/Transforms/InstCombine/neon-intrinsics.ll
    llvm/trunk/test/Transforms/InstCombine/pr2645-1.ll
    llvm/trunk/test/Transforms/InstCombine/shufflemask-undef.ll
    llvm/trunk/test/Transforms/InstCombine/vsx-unaligned.ll
    llvm/trunk/test/Transforms/InstCombine/x86-avx2.ll
    llvm/trunk/test/Transforms/InstCombine/x86-avx512.ll
    llvm/trunk/test/Transforms/InstCombine/x86-crc32-demanded.ll
    llvm/trunk/test/Transforms/InstCombine/x86-f16c.ll
    llvm/trunk/test/Transforms/InstCombine/x86-fma.ll
    llvm/trunk/test/Transforms/InstCombine/x86-insertps.ll
    llvm/trunk/test/Transforms/InstCombine/x86-masked-memops.ll
    llvm/trunk/test/Transforms/InstCombine/x86-movmsk.ll
    llvm/trunk/test/Transforms/InstCombine/x86-muldq.ll
    llvm/trunk/test/Transforms/InstCombine/x86-pack.ll
    llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll
    llvm/trunk/test/Transforms/InstCombine/x86-sse.ll
    llvm/trunk/test/Transforms/InstCombine/x86-sse2.ll
    llvm/trunk/test/Transforms/InstCombine/x86-sse41.ll
    llvm/trunk/test/Transforms/InstCombine/x86-sse4a.ll
    llvm/trunk/test/Transforms/InstCombine/x86-vector-shifts.ll
    llvm/trunk/test/Transforms/InstCombine/x86-vperm2.ll
    llvm/trunk/test/Transforms/InstCombine/x86-vpermil.ll
    llvm/trunk/test/Transforms/InstCombine/x86-xop.ll
Modified:
    llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll

Removed: llvm/trunk/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll?rev=302978&view=auto
==============================================================================

--- llvm/trunk/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll (removed)
@@ -1,135 +0,0 @@
-; RUN: opt -S -instcombine < %s | FileCheck %s
-
-define <4 x i32> @mulByZero(<4 x i16> %x) nounwind readnone ssp {
-entry:
-  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind
-  ret <4 x i32> %a
-; CHECK: entry:
-; CHECK-NEXT: ret <4 x i32> zeroinitializer
-}
-
-define <4 x i32> @mulByOne(<4 x i16> %x) nounwind readnone ssp {
-entry:
-  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
-  ret <4 x i32> %a
-; CHECK: entry:
-; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32>
-; CHECK-NEXT: ret <4 x i32> %a
-}
-
-define <4 x i32> @constantMul() nounwind readnone ssp {
-entry:
-  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
-  ret <4 x i32> %a
-; CHECK: entry:
-; CHECK-NEXT: ret <4 x i32> <i32 6, i32 6, i32 6, i32 6>
-}
-
-define <4 x i32> @constantMulS() nounwind readnone ssp {
-entry:
-  %b = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
-  ret <4 x i32> %b
-; CHECK: entry:
-; CHECK-NEXT: ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
-}
-
-define <4 x i32> @constantMulU() nounwind readnone ssp {
-entry:
-  %b = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
-  ret <4 x i32> %b
-; CHECK: entry:
-; CHECK-NEXT: ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
-}
-
-define <4 x i32> @complex1(<4 x i16> %x) nounwind readnone ssp {
-entry:
-  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind
-  %b = add <4 x i32> zeroinitializer, %a
-  ret <4 x i32> %b
-; CHECK: entry:
-; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]]
-; CHECK-NEXT: ret <4 x i32> %a
-}
-
-define <4 x i32> @complex2(<4 x i32> %x) nounwind readnone ssp {
-entry:
-  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
-  %b = add <4 x i32> %x, %a
-  ret <4 x i32> %b  
-; CHECK: entry:
-; CHECK-NEXT: %b = add <4 x i32> %x, <i32 6, i32 6, i32 6, i32 6>
-; CHECK-NEXT: ret <4 x i32> %b
-}
-
-declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-
-; ARM64 variants - <rdar://problem/12349617>
-
-define <4 x i32> @mulByZeroARM64(<4 x i16> %x) nounwind readnone ssp {
-entry:
-  %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind
-  ret <4 x i32> %a
-; CHECK: entry:
-; CHECK-NEXT: ret <4 x i32> zeroinitializer
-}
-
-define <4 x i32> @mulByOneARM64(<4 x i16> %x) nounwind readnone ssp {
-entry:
-  %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> %x, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
-  ret <4 x i32> %a
-; CHECK: entry:
-; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32>
-; CHECK-NEXT: ret <4 x i32> %a
-}
-
-define <4 x i32> @constantMulARM64() nounwind readnone ssp {
-entry:
-  %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
-  ret <4 x i32> %a
-; CHECK: entry:
-; CHECK-NEXT: ret <4 x i32> <i32 6, i32 6, i32 6, i32 6>
-}
-
-define <4 x i32> @constantMulSARM64() nounwind readnone ssp {
-entry:
-  %b = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
-  ret <4 x i32> %b
-; CHECK: entry:
-; CHECK-NEXT: ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
-}
-
-define <4 x i32> @constantMulUARM64() nounwind readnone ssp {
-entry:
-  %b = tail call <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
-  ret <4 x i32> %b
-; CHECK: entry:
-; CHECK-NEXT: ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
-}
-
-define <4 x i32> @complex1ARM64(<4 x i16> %x) nounwind readnone ssp {
-entry:
-  %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind
-  %b = add <4 x i32> zeroinitializer, %a
-  ret <4 x i32> %b
-; CHECK: entry:
-; CHECK-NEXT: %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]]
-; CHECK-NEXT: ret <4 x i32> %a
-}
-
-define <4 x i32> @complex2ARM64(<4 x i32> %x) nounwind readnone ssp {
-entry:
-  %a = tail call <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
-  %b = add <4 x i32> %x, %a
-  ret <4 x i32> %b
-; CHECK: entry:
-; CHECK-NEXT: %b = add <4 x i32> %x, <i32 6, i32 6, i32 6, i32 6>
-; CHECK-NEXT: ret <4 x i32> %b
-}
-
-declare <4 x i32> @llvm.aarch64.neon.smull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <4 x i32> @llvm.aarch64.neon.umull.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-
-; CHECK: attributes #0 = { nounwind readnone ssp }
-; CHECK: attributes #1 = { nounwind readnone }
-; CHECK: attributes [[NUW]] = { nounwind }

Copied: llvm/trunk/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll?p2=llvm/trunk/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll&p1=llvm/trunk/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/2012-04-23-Neon-Intrinsics.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/AArch64/2012-04-23-Neon-Intrinsics.ll Sat May 13 00:39:46 2017
@@ -1,70 +1,6 @@
 ; RUN: opt -S -instcombine < %s | FileCheck %s
-
-define <4 x i32> @mulByZero(<4 x i16> %x) nounwind readnone ssp {
-entry:
-  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind
-  ret <4 x i32> %a
-; CHECK: entry:
-; CHECK-NEXT: ret <4 x i32> zeroinitializer
-}
-
-define <4 x i32> @mulByOne(<4 x i16> %x) nounwind readnone ssp {
-entry:
-  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
-  ret <4 x i32> %a
-; CHECK: entry:
-; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32>
-; CHECK-NEXT: ret <4 x i32> %a
-}
-
-define <4 x i32> @constantMul() nounwind readnone ssp {
-entry:
-  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
-  ret <4 x i32> %a
-; CHECK: entry:
-; CHECK-NEXT: ret <4 x i32> <i32 6, i32 6, i32 6, i32 6>
-}
-
-define <4 x i32> @constantMulS() nounwind readnone ssp {
-entry:
-  %b = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
-  ret <4 x i32> %b
-; CHECK: entry:
-; CHECK-NEXT: ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
-}
-
-define <4 x i32> @constantMulU() nounwind readnone ssp {
-entry:
-  %b = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
-  ret <4 x i32> %b
-; CHECK: entry:
-; CHECK-NEXT: ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
-}
-
-define <4 x i32> @complex1(<4 x i16> %x) nounwind readnone ssp {
-entry:
-  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind
-  %b = add <4 x i32> zeroinitializer, %a
-  ret <4 x i32> %b
-; CHECK: entry:
-; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]]
-; CHECK-NEXT: ret <4 x i32> %a
-}
-
-define <4 x i32> @complex2(<4 x i32> %x) nounwind readnone ssp {
-entry:
-  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
-  %b = add <4 x i32> %x, %a
-  ret <4 x i32> %b  
-; CHECK: entry:
-; CHECK-NEXT: %b = add <4 x i32> %x, <i32 6, i32 6, i32 6, i32 6>
-; CHECK-NEXT: ret <4 x i32> %b
-}
-
-declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
-
-; ARM64 variants - <rdar://problem/12349617>
+; ARM64 neon intrinsic variants - <rdar://problem/12349617>
+; REQUIRES: aarch64
 
 define <4 x i32> @mulByZeroARM64(<4 x i16> %x) nounwind readnone ssp {
 entry:

Added: llvm/trunk/test/Transforms/InstCombine/AArch64/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/AArch64/lit.local.cfg?rev=302979&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/AArch64/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/InstCombine/AArch64/lit.local.cfg Sat May 13 00:39:46 2017
@@ -0,0 +1,2 @@
+if not 'AArch64' in config.root.targets:
+    config.unsupported = True

Copied: llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/amdgcn-intrinsics.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll?p2=llvm/trunk/test/Transforms/InstCombine/AMDGPU/amdgcn-intrinsics.ll&p1=llvm/trunk/test/Transforms/InstCombine/amdgcn-intrinsics.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Added: llvm/trunk/test/Transforms/InstCombine/AMDGPU/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/AMDGPU/lit.local.cfg?rev=302979&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/AMDGPU/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/InstCombine/AMDGPU/lit.local.cfg Sat May 13 00:39:46 2017
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+    config.unsupported = True

Added: llvm/trunk/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll?rev=302979&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/ARM/2012-04-23-Neon-Intrinsics.ll Sat May 13 00:39:46 2017
@@ -0,0 +1,65 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define <4 x i32> @mulByZero(<4 x i16> %x) nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) nounwind
+  ret <4 x i32> %a
+; CHECK: entry:
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+}
+
+define <4 x i32> @mulByOne(<4 x i16> %x) nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
+  ret <4 x i32> %a
+; CHECK: entry:
+; CHECK-NEXT: %a = sext <4 x i16> %x to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> %a
+}
+
+define <4 x i32> @constantMul() nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
+  ret <4 x i32> %a
+; CHECK: entry:
+; CHECK-NEXT: ret <4 x i32> <i32 6, i32 6, i32 6, i32 6>
+}
+
+define <4 x i32> @constantMulS() nounwind readnone ssp {
+entry:
+  %b = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
+  ret <4 x i32> %b
+; CHECK: entry:
+; CHECK-NEXT: ret <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>
+}
+
+define <4 x i32> @constantMulU() nounwind readnone ssp {
+entry:
+  %b = tail call <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16> <i16 -1, i16 -1, i16 -1, i16 -1>, <4 x i16> <i16 1, i16 1, i16 1, i16 1>) nounwind
+  ret <4 x i32> %b
+; CHECK: entry:
+; CHECK-NEXT: ret <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>
+}
+
+define <4 x i32> @complex1(<4 x i16> %x) nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) nounwind
+  %b = add <4 x i32> zeroinitializer, %a
+  ret <4 x i32> %b
+; CHECK: entry:
+; CHECK-NEXT: %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 2, i16 2, i16 2, i16 2>, <4 x i16> %x) [[NUW:#[0-9]+]]
+; CHECK-NEXT: ret <4 x i32> %a
+}
+
+define <4 x i32> @complex2(<4 x i32> %x) nounwind readnone ssp {
+entry:
+  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> <i16 3, i16 3, i16 3, i16 3>, <4 x i16> <i16 2, i16 2, i16 2, i16 2>) nounwind
+  %b = add <4 x i32> %x, %a
+  ret <4 x i32> %b  
+; CHECK: entry:
+; CHECK-NEXT: %b = add <4 x i32> %x, <i32 6, i32 6, i32 6, i32 6>
+; CHECK-NEXT: ret <4 x i32> %b
+}
+
+declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) nounwind readnone
+declare <4 x i32> @llvm.arm.neon.vmullu.v4i32(<4 x i16>, <4 x i16>) nounwind readnone

Copied: llvm/trunk/test/Transforms/InstCombine/ARM/constant-fold-hang.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/constant-fold-hang.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/ARM/constant-fold-hang.ll?p2=llvm/trunk/test/Transforms/InstCombine/ARM/constant-fold-hang.ll&p1=llvm/trunk/test/Transforms/InstCombine/constant-fold-hang.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Added: llvm/trunk/test/Transforms/InstCombine/ARM/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/ARM/lit.local.cfg?rev=302979&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/ARM/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/InstCombine/ARM/lit.local.cfg Sat May 13 00:39:46 2017
@@ -0,0 +1,2 @@
+if not 'ARM' in config.root.targets:
+    config.unsupported = True

Copied: llvm/trunk/test/Transforms/InstCombine/ARM/neon-intrinsics.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/neon-intrinsics.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/ARM/neon-intrinsics.ll?p2=llvm/trunk/test/Transforms/InstCombine/ARM/neon-intrinsics.ll&p1=llvm/trunk/test/Transforms/InstCombine/neon-intrinsics.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/PowerPC/aligned-altivec.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/aligned-altivec.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/PowerPC/aligned-altivec.ll?p2=llvm/trunk/test/Transforms/InstCombine/PowerPC/aligned-altivec.ll&p1=llvm/trunk/test/Transforms/InstCombine/aligned-altivec.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/PowerPC/aligned-qpx.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/aligned-qpx.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/PowerPC/aligned-qpx.ll?p2=llvm/trunk/test/Transforms/InstCombine/PowerPC/aligned-qpx.ll&p1=llvm/trunk/test/Transforms/InstCombine/aligned-qpx.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Added: llvm/trunk/test/Transforms/InstCombine/PowerPC/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/PowerPC/lit.local.cfg?rev=302979&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/PowerPC/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/InstCombine/PowerPC/lit.local.cfg Sat May 13 00:39:46 2017
@@ -0,0 +1,3 @@
+if not 'PowerPC' in config.root.targets:
+    config.unsupported = True
+

Copied: llvm/trunk/test/Transforms/InstCombine/PowerPC/vsx-unaligned.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/vsx-unaligned.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/PowerPC/vsx-unaligned.ll?p2=llvm/trunk/test/Transforms/InstCombine/PowerPC/vsx-unaligned.ll&p1=llvm/trunk/test/Transforms/InstCombine/vsx-unaligned.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/X86FsubCmpCombine.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/X86FsubCmpCombine.ll&p1=llvm/trunk/test/Transforms/InstCombine/X86FsubCmpCombine.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/X86/blend_x86.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/blend_x86.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/blend_x86.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/blend_x86.ll&p1=llvm/trunk/test/Transforms/InstCombine/blend_x86.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Added: llvm/trunk/test/Transforms/InstCombine/X86/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/lit.local.cfg?rev=302979&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/InstCombine/X86/lit.local.cfg Sat May 13 00:39:46 2017
@@ -0,0 +1,2 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True

Copied: llvm/trunk/test/Transforms/InstCombine/X86/pr2645-1.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/pr2645-1.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/pr2645-1.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/pr2645-1.ll&p1=llvm/trunk/test/Transforms/InstCombine/pr2645-1.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/X86/shufflemask-undef.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/shufflemask-undef.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/shufflemask-undef.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/shufflemask-undef.ll&p1=llvm/trunk/test/Transforms/InstCombine/shufflemask-undef.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/shufflemask-undef.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/X86/shufflemask-undef.ll Sat May 13 00:39:46 2017
@@ -1,4 +1,5 @@
-; RUN: opt < %s -instcombine -S | not grep "shufflevector.*i32 8"
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK-NOT: shufflevector{{.*}}i32 8"
 
 target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
 target triple = "i386-apple-darwin9"

Copied: llvm/trunk/test/Transforms/InstCombine/X86/x86-avx2.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/x86-avx2.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-avx2.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/x86-avx2.ll&p1=llvm/trunk/test/Transforms/InstCombine/x86-avx2.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/X86/x86-avx512.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/x86-avx512.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-avx512.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/x86-avx512.ll&p1=llvm/trunk/test/Transforms/InstCombine/x86-avx512.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/X86/x86-crc32-demanded.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/x86-crc32-demanded.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-crc32-demanded.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/x86-crc32-demanded.ll&p1=llvm/trunk/test/Transforms/InstCombine/x86-crc32-demanded.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/X86/x86-f16c.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/x86-f16c.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-f16c.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/x86-f16c.ll&p1=llvm/trunk/test/Transforms/InstCombine/x86-f16c.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/X86/x86-fma.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/x86-fma.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-fma.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/x86-fma.ll&p1=llvm/trunk/test/Transforms/InstCombine/x86-fma.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/X86/x86-insertps.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/x86-insertps.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-insertps.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/x86-insertps.ll&p1=llvm/trunk/test/Transforms/InstCombine/x86-insertps.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/X86/x86-masked-memops.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/x86-masked-memops.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-masked-memops.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/x86-masked-memops.ll&p1=llvm/trunk/test/Transforms/InstCombine/x86-masked-memops.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/X86/x86-movmsk.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/x86-movmsk.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-movmsk.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/x86-movmsk.ll&p1=llvm/trunk/test/Transforms/InstCombine/x86-movmsk.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/X86/x86-muldq.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/x86-muldq.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-muldq.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/x86-muldq.ll&p1=llvm/trunk/test/Transforms/InstCombine/x86-muldq.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/X86/x86-pack.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/x86-pack.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-pack.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/x86-pack.ll&p1=llvm/trunk/test/Transforms/InstCombine/x86-pack.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/X86/x86-pshufb.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-pshufb.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/x86-pshufb.ll&p1=llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/X86/x86-sse.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/x86-sse.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-sse.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/x86-sse.ll&p1=llvm/trunk/test/Transforms/InstCombine/x86-sse.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/X86/x86-sse2.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/x86-sse2.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-sse2.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/x86-sse2.ll&p1=llvm/trunk/test/Transforms/InstCombine/x86-sse2.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/X86/x86-sse41.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/x86-sse41.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-sse41.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/x86-sse41.ll&p1=llvm/trunk/test/Transforms/InstCombine/x86-sse41.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/X86/x86-sse4a.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/x86-sse4a.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-sse4a.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/x86-sse4a.ll&p1=llvm/trunk/test/Transforms/InstCombine/x86-sse4a.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/X86/x86-vec_demanded_elts.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-vec_demanded_elts.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/x86-vec_demanded_elts.ll&p1=llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/X86/x86-vec_demanded_elts.ll Sat May 13 00:39:46 2017
@@ -1,4 +1,3 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
@@ -26,22 +25,6 @@ define i16 @test1(float %f) {
   ret i16 %tmp69
 }
 
-define i32 @test2(float %f) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[TMP5:%.*]] = fmul float %f, %f
-; CHECK-NEXT:    [[TMP21:%.*]] = bitcast float [[TMP5]] to i32
-; CHECK-NEXT:    ret i32 [[TMP21]]
-;
-  %tmp5 = fmul float %f, %f
-  %tmp9 = insertelement <4 x float> undef, float %tmp5, i32 0
-  %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 1
-  %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2
-  %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3
-  %tmp19 = bitcast <4 x float> %tmp12 to <4 x i32>
-  %tmp21 = extractelement <4 x i32> %tmp19, i32 0
-  ret i32 %tmp21
-}
-
 define i64 @test3(float %f, double %d) {
 ; CHECK-LABEL: @test3(
 ; CHECK-NEXT:    [[V00:%.*]] = insertelement <4 x float> undef, float %f, i32 0
@@ -113,49 +96,6 @@ define i64 @test3(float %f, double %d) {
   ret i64 %tmp15
 }
 
-define void @get_image() nounwind {
-; CHECK-LABEL: @get_image(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @fgetc(i8* null) #0
-; CHECK-NEXT:    br i1 false, label %bb2, label %bb3
-; CHECK:       bb2:
-; CHECK-NEXT:    br label %bb3
-; CHECK:       bb3:
-; CHECK-NEXT:    unreachable
-;
-entry:
-  %0 = call i32 @fgetc(i8* null) nounwind               ; <i32> [#uses=1]
-  %1 = trunc i32 %0 to i8         ; <i8> [#uses=1]
-  %tmp2 = insertelement <100 x i8> zeroinitializer, i8 %1, i32 1          ; <<100 x i8>> [#uses=1]
-  %tmp1 = extractelement <100 x i8> %tmp2, i32 0          ; <i8> [#uses=1]
-  %2 = icmp eq i8 %tmp1, 80               ; <i1> [#uses=1]
-  br i1 %2, label %bb2, label %bb3
-
-bb2:            ; preds = %entry
-  br label %bb3
-
-bb3:            ; preds = %bb2, %entry
-  unreachable
-}
-
-; PR4340
-define void @vac(<4 x float>* nocapture %a) nounwind {
-; CHECK-LABEL: @vac(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    store <4 x float> zeroinitializer, <4 x float>* %a, align 16
-; CHECK-NEXT:    ret void
-;
-entry:
-  %tmp1 = load <4 x float>, <4 x float>* %a		; <<4 x float>> [#uses=1]
-  %vecins = insertelement <4 x float> %tmp1, float 0.000000e+00, i32 0	; <<4 x float>> [#uses=1]
-  %vecins4 = insertelement <4 x float> %vecins, float 0.000000e+00, i32 1; <<4 x float>> [#uses=1]
-  %vecins6 = insertelement <4 x float> %vecins4, float 0.000000e+00, i32 2; <<4 x float>> [#uses=1]
-  %vecins8 = insertelement <4 x float> %vecins6, float 0.000000e+00, i32 3; <<4 x float>> [#uses=1]
-  store <4 x float> %vecins8, <4 x float>* %a
-  ret void
-}
-
-declare i32 @fgetc(i8*)
 declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
 declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
 declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
@@ -168,84 +108,3 @@ declare i32 @llvm.x86.sse2.cvtsd2si(<2 x
 declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>)
 declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>)
 declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>)
-
-define <4 x float> @dead_shuffle_elt(<4 x float> %x, <2 x float> %y) nounwind {
-; CHECK-LABEL: @dead_shuffle_elt(
-; CHECK-NEXT:    [[SHUFFLE_I:%.*]] = shufflevector <2 x float> %y, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
-; CHECK-NEXT:    [[SHUFFLE9_I:%.*]] = shufflevector <4 x float> %x, <4 x float> [[SHUFFLE_I]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[SHUFFLE9_I]]
-;
-  %shuffle.i = shufflevector <2 x float> %y, <2 x float> %y, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-  %shuffle9.i = shufflevector <4 x float> %x, <4 x float> %shuffle.i, <4 x i32> <i32 4, i32 5, i32 2, i32 3>
-  ret <4 x float> %shuffle9.i
-}
-
-define <2 x float> @test_fptrunc(double %f) {
-; CHECK-LABEL: @test_fptrunc(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> <double undef, double 0.000000e+00>, double %f, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = fptrunc <2 x double> [[TMP1]] to <2 x float>
-; CHECK-NEXT:    ret <2 x float> [[TMP2]]
-;
-  %tmp9 = insertelement <4 x double> undef, double %f, i32 0
-  %tmp10 = insertelement <4 x double> %tmp9, double 0.000000e+00, i32 1
-  %tmp11 = insertelement <4 x double> %tmp10, double 0.000000e+00, i32 2
-  %tmp12 = insertelement <4 x double> %tmp11, double 0.000000e+00, i32 3
-  %tmp5 = fptrunc <4 x double> %tmp12 to <4 x float>
-  %ret = shufflevector <4 x float> %tmp5, <4 x float> undef, <2 x i32> <i32 0, i32 1>
-  ret <2 x float> %ret
-}
-
-define <2 x double> @test_fpext(float %f) {
-; CHECK-LABEL: @test_fpext(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x float> <float undef, float 0.000000e+00>, float %f, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = fpext <2 x float> [[TMP1]] to <2 x double>
-; CHECK-NEXT:    ret <2 x double> [[TMP2]]
-;
-  %tmp9 = insertelement <4 x float> undef, float %f, i32 0
-  %tmp10 = insertelement <4 x float> %tmp9, float 0.000000e+00, i32 1
-  %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2
-  %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3
-  %tmp5 = fpext <4 x float> %tmp12 to <4 x double>
-  %ret = shufflevector <4 x double> %tmp5, <4 x double> undef, <2 x i32> <i32 0, i32 1>
-  ret <2 x double> %ret
-}
-
-define <4 x double> @test_shuffle(<4 x double> %f) {
-; CHECK-LABEL: @test_shuffle(
-; CHECK-NEXT:    [[RET1:%.*]] = insertelement <4 x double> %f, double 1.000000e+00, i32 3
-; CHECK-NEXT:    ret <4 x double> [[RET1]]
-;
-  %ret = shufflevector <4 x double> %f, <4 x double> <double undef, double 1.0, double undef, double undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 5>
-  ret <4 x double> %ret
-}
-
-define <4 x float> @test_select(float %f, float %g) {
-; CHECK-LABEL: @test_select(
-; CHECK-NEXT:    [[A3:%.*]] = insertelement <4 x float> <float undef, float undef, float undef, float 3.000000e+00>, float %f, i32 0
-; CHECK-NEXT:    [[RET:%.*]] = shufflevector <4 x float> [[A3]], <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[RET]]
-;
-  %a0 = insertelement <4 x float> undef, float %f, i32 0
-  %a1 = insertelement <4 x float> %a0, float 1.000000e+00, i32 1
-  %a2 = insertelement <4 x float> %a1, float 2.000000e+00, i32 2
-  %a3 = insertelement <4 x float> %a2, float 3.000000e+00, i32 3
-  %b0 = insertelement <4 x float> undef, float %g, i32 0
-  %b1 = insertelement <4 x float> %b0, float 4.000000e+00, i32 1
-  %b2 = insertelement <4 x float> %b1, float 5.000000e+00, i32 2
-  %b3 = insertelement <4 x float> %b2, float 6.000000e+00, i32 3
-  %ret = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %a3, <4 x float> %b3
-  ret <4 x float> %ret
-}
-
-; Check that instcombine doesn't wrongly fold away the select completely.
-; TODO: Should this be an insertelement rather than a shuffle?
-
-define <2 x i64> @PR24922(<2 x i64> %v) {
-; CHECK-LABEL: @PR24922(
-; CHECK-NEXT:    [[RESULT1:%.*]] = insertelement <2 x i64> %v, i64 0, i32 0
-; CHECK-NEXT:    ret <2 x i64> [[RESULT1]]
-;
-  %result = select <2 x i1> <i1 icmp eq (i64 extractelement (<2 x i64> bitcast (<4 x i32> <i32 15, i32 15, i32 15, i32 15> to <2 x i64>), i64 0), i64 0), i1 true>, <2 x i64> %v, <2 x i64> zeroinitializer
-  ret <2 x i64> %result
-}
-

Copied: llvm/trunk/test/Transforms/InstCombine/X86/x86-vector-shifts.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/x86-vector-shifts.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-vector-shifts.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/x86-vector-shifts.ll&p1=llvm/trunk/test/Transforms/InstCombine/x86-vector-shifts.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/X86/x86-vperm2.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/x86-vperm2.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-vperm2.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/x86-vperm2.ll&p1=llvm/trunk/test/Transforms/InstCombine/x86-vperm2.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/X86/x86-vpermil.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/x86-vpermil.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-vpermil.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/x86-vpermil.ll&p1=llvm/trunk/test/Transforms/InstCombine/x86-vpermil.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Copied: llvm/trunk/test/Transforms/InstCombine/X86/x86-xop.ll (from r302977, llvm/trunk/test/Transforms/InstCombine/x86-xop.ll)
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86/x86-xop.ll?p2=llvm/trunk/test/Transforms/InstCombine/X86/x86-xop.ll&p1=llvm/trunk/test/Transforms/InstCombine/x86-xop.ll&r1=302977&r2=302979&rev=302979&view=diff
==============================================================================
    (empty)

Removed: llvm/trunk/test/Transforms/InstCombine/X86FsubCmpCombine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/X86FsubCmpCombine.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/X86FsubCmpCombine.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/X86FsubCmpCombine.ll (removed)
@@ -1,181 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-; The test checks the folding of cmp(sub(a,b),0) into cmp(a,b).
-
-define i8 @sub_compare_foldingPD128_safe(<2 x double> %a, <2 x double> %b){
-; CHECK-LABEL: @sub_compare_foldingPD128_safe(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[SUB_SAFE:%.*]] = fsub <2 x double> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> [[SUB_SAFE]], <2 x double> zeroinitializer, i32 5, i8 -1)
-; CHECK-NEXT:    ret i8 [[TMP0]]
-;
-entry:
-  %sub.safe = fsub <2 x double> %a, %b
-  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %sub.safe , <2 x double> zeroinitializer, i32 5, i8 -1)
-  ret i8 %0
-}
-
-
-define i8 @sub_compare_foldingPD128(<2 x double> %a, <2 x double> %b){
-; CHECK-LABEL: @sub_compare_foldingPD128(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 5, i8 -1)
-; CHECK-NEXT:    ret i8 [[TMP0]]
-;
-entry:
-  %sub.i = fsub ninf <2 x double> %a, %b
-  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %sub.i , <2 x double> zeroinitializer, i32 5, i8 -1)
-  ret i8 %0
-}
-
-
-define i8 @sub_compare_foldingPD256(<4 x double> %a, <4 x double> %b){
-; CHECK-LABEL: @sub_compare_foldingPD256(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> [[A:%.*]], <4 x double> [[B:%.*]], i32 5, i8 -1)
-; CHECK-NEXT:    ret i8 [[TMP0]]
-;
-entry:
-  %sub.i1 = fsub ninf <4 x double> %a, %b
-  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %sub.i1, <4 x double> zeroinitializer, i32 5, i8 -1)
-  ret i8 %0
-}
-
-
-define i8 @sub_compare_foldingPD512(<8 x double> %a, <8 x double> %b){
-; CHECK-LABEL: @sub_compare_foldingPD512(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 11, i8 -1, i32 4)
-; CHECK-NEXT:    ret i8 [[TMP0]]
-;
-entry:
-  %sub.i2 = fsub ninf <8 x double> %a, %b
-  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %sub.i2, <8 x double> zeroinitializer, i32 11, i8 -1, i32 4)
-  ret i8 %0
-}
-
-
-define i8 @sub_compare_foldingPS128(<4 x float> %a, <4 x float> %b){
-; CHECK-LABEL: @sub_compare_foldingPS128(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 12, i8 -1)
-; CHECK-NEXT:    ret i8 [[TMP0]]
-;
-entry:
-  %sub.i3 = fsub ninf <4 x float> %a, %b
-  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %sub.i3, <4 x float> zeroinitializer, i32 12, i8 -1)
-  ret i8 %0
-}
-
-
-define i8 @sub_compare_foldingPS256(<8 x float> %a, <8 x float> %b){
-; CHECK-LABEL: @sub_compare_foldingPS256(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> [[A:%.*]], <8 x float> [[B:%.*]], i32 5, i8 -1)
-; CHECK-NEXT:    ret i8 [[TMP0]]
-;
-entry:
-  %sub.i4 = fsub ninf <8 x float> %a, %b
-  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %sub.i4, <8 x float> zeroinitializer, i32 5, i8 -1)
-  ret i8 %0
-}
-
-
-define i16 @sub_compare_foldingPS512(<16 x float> %a, <16 x float> %b){
-; CHECK-LABEL: @sub_compare_foldingPS512(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 11, i16 -1, i32 4)
-; CHECK-NEXT:    ret i16 [[TMP0]]
-;
-entry:
-  %sub.i5 = fsub ninf <16 x float> %a, %b
-  %0 = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %sub.i5, <16 x float> zeroinitializer, i32 11, i16 -1, i32 4)
-  ret i16 %0
-}
-
-
-
-define i8 @sub_compare_folding_swapPD128(<2 x double> %a, <2 x double> %b){
-; CHECK-LABEL: @sub_compare_folding_swapPD128(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> [[B:%.*]], <2 x double> [[A:%.*]], i32 5, i8 -1)
-; CHECK-NEXT:    ret i8 [[TMP0]]
-;
-entry:
-  %sub.i = fsub ninf <2 x double> %a, %b
-  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> zeroinitializer, <2 x double> %sub.i, i32 5, i8 -1)
-  ret i8 %0
-}
-
-
-define i8 @sub_compare_folding_swapPD256(<4 x double> %a, <4 x double> %b){
-; CHECK-LABEL: @sub_compare_folding_swapPD256(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> [[B:%.*]], <4 x double> [[A:%.*]], i32 5, i8 -1)
-; CHECK-NEXT:    ret i8 [[TMP0]]
-;
-entry:
-  %sub.i = fsub ninf <4 x double> %a, %b
-  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> zeroinitializer, <4 x double> %sub.i, i32 5, i8 -1)
-  ret i8 %0
-}
-
-
-define i8 @sub_compare_folding_swapPD512(<8 x double> %a, <8 x double> %b){
-; CHECK-LABEL: @sub_compare_folding_swapPD512(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> [[B:%.*]], <8 x double> [[A:%.*]], i32 11, i8 -1, i32 4)
-; CHECK-NEXT:    ret i8 [[TMP0]]
-;
-entry:
-  %sub.i = fsub ninf <8 x double> %a, %b
-  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> zeroinitializer, <8 x double> %sub.i, i32 11, i8 -1, i32 4)
-  ret i8 %0
-}
-
-
-define i8 @sub_compare_folding_swapPS128(<4 x float> %a, <4 x float> %b){
-; CHECK-LABEL: @sub_compare_folding_swapPS128(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> [[B:%.*]], <4 x float> [[A:%.*]], i32 12, i8 -1)
-; CHECK-NEXT:    ret i8 [[TMP0]]
-;
-entry:
-  %sub.i = fsub ninf <4 x float> %a, %b
-  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> zeroinitializer, <4 x float> %sub.i, i32 12, i8 -1)
-  ret i8 %0
-}
-
-
-define i8 @sub_compare_folding_swapPS256(<8 x float> %a, <8 x float> %b){
-; CHECK-LABEL: @sub_compare_folding_swapPS256(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> [[B:%.*]], <8 x float> [[A:%.*]], i32 5, i8 -1)
-; CHECK-NEXT:    ret i8 [[TMP0]]
-;
-entry:
-  %sub.i = fsub ninf <8 x float> %a, %b
-  %0 = tail call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> zeroinitializer, <8 x float> %sub.i, i32 5, i8 -1)
-  ret i8 %0
-}
-
-
-define i16 @sub_compare_folding_swapPS512(<16 x float> %a, <16 x float> %b){
-; CHECK-LABEL: @sub_compare_folding_swapPS512(
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> [[B:%.*]], <16 x float> [[A:%.*]], i32 11, i16 -1, i32 4)
-; CHECK-NEXT:    ret i16 [[TMP0]]
-;
-entry:
-  %sub.i = fsub ninf <16 x float> %a, %b
-  %0 = tail call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> zeroinitializer, <16 x float> %sub.i, i32 11, i16 -1, i32 4)
-  ret i16 %0
-}
-
-declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double>, <2 x double>, i32, i8)
-declare i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double>, <4 x double>, i32, i8)
-declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double>, <8 x double>, i32, i8, i32)
-declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float>, <4 x float>, i32, i8)
-declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float>, <8 x float>, i32, i8)
-declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float>, <16 x float>, i32, i16, i32)

Removed: llvm/trunk/test/Transforms/InstCombine/aligned-altivec.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/aligned-altivec.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/aligned-altivec.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/aligned-altivec.ll (removed)
@@ -1,131 +0,0 @@
-; RUN: opt -S -instcombine < %s | FileCheck %s
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
-target triple = "powerpc64-unknown-linux-gnu"
-
-declare <4 x i32> @llvm.ppc.altivec.lvx(i8*) #1
-
-define <4 x i32> @test1(<4 x i32>* %h) #0 {
-entry:
-  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
-  %hv = bitcast <4 x i32>* %h1 to i8*
-  %vl = call <4 x i32> @llvm.ppc.altivec.lvx(i8* %hv)
-
-; CHECK-LABEL: @test1
-; CHECK: @llvm.ppc.altivec.lvx
-; CHECK: ret <4 x i32>
-
-  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
-  %a = add <4 x i32> %v0, %vl
-  ret <4 x i32> %a
-}
-
-define <4 x i32> @test1a(<4 x i32>* align 16 %h) #0 {
-entry:
-  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
-  %hv = bitcast <4 x i32>* %h1 to i8*
-  %vl = call <4 x i32> @llvm.ppc.altivec.lvx(i8* %hv)
-
-; CHECK-LABEL: @test1a
-; CHECK-NOT: @llvm.ppc.altivec.lvx
-; CHECK: ret <4 x i32>
-
-  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
-  %a = add <4 x i32> %v0, %vl
-  ret <4 x i32> %a
-}
-
-declare void @llvm.ppc.altivec.stvx(<4 x i32>, i8*) #0
-
-define <4 x i32> @test2(<4 x i32>* %h, <4 x i32> %d) #0 {
-entry:
-  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
-  %hv = bitcast <4 x i32>* %h1 to i8*
-  call void @llvm.ppc.altivec.stvx(<4 x i32> %d, i8* %hv)
-
-  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
-  ret <4 x i32> %v0
-
-; CHECK-LABEL: @test2
-; CHECK: @llvm.ppc.altivec.stvx
-; CHECK: ret <4 x i32>
-}
-
-define <4 x i32> @test2a(<4 x i32>* align 16 %h, <4 x i32> %d) #0 {
-entry:
-  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
-  %hv = bitcast <4 x i32>* %h1 to i8*
-  call void @llvm.ppc.altivec.stvx(<4 x i32> %d, i8* %hv)
-
-  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
-  ret <4 x i32> %v0
-
-; CHECK-LABEL: @test2
-; CHECK-NOT: @llvm.ppc.altivec.stvx
-; CHECK: ret <4 x i32>
-}
-
-declare <4 x i32> @llvm.ppc.altivec.lvxl(i8*) #1
-
-define <4 x i32> @test1l(<4 x i32>* %h) #0 {
-entry:
-  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
-  %hv = bitcast <4 x i32>* %h1 to i8*
-  %vl = call <4 x i32> @llvm.ppc.altivec.lvxl(i8* %hv)
-
-; CHECK-LABEL: @test1l
-; CHECK: @llvm.ppc.altivec.lvxl
-; CHECK: ret <4 x i32>
-
-  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
-  %a = add <4 x i32> %v0, %vl
-  ret <4 x i32> %a
-}
-
-define <4 x i32> @test1la(<4 x i32>* align 16 %h) #0 {
-entry:
-  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
-  %hv = bitcast <4 x i32>* %h1 to i8*
-  %vl = call <4 x i32> @llvm.ppc.altivec.lvxl(i8* %hv)
-
-; CHECK-LABEL: @test1la
-; CHECK-NOT: @llvm.ppc.altivec.lvxl
-; CHECK: ret <4 x i32>
-
-  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
-  %a = add <4 x i32> %v0, %vl
-  ret <4 x i32> %a
-}
-
-declare void @llvm.ppc.altivec.stvxl(<4 x i32>, i8*) #0
-
-define <4 x i32> @test2l(<4 x i32>* %h, <4 x i32> %d) #0 {
-entry:
-  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
-  %hv = bitcast <4 x i32>* %h1 to i8*
-  call void @llvm.ppc.altivec.stvxl(<4 x i32> %d, i8* %hv)
-
-  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
-  ret <4 x i32> %v0
-
-; CHECK-LABEL: @test2l
-; CHECK: @llvm.ppc.altivec.stvxl
-; CHECK: ret <4 x i32>
-}
-
-define <4 x i32> @test2la(<4 x i32>* align 16 %h, <4 x i32> %d) #0 {
-entry:
-  %h1 = getelementptr <4 x i32>, <4 x i32>* %h, i64 1
-  %hv = bitcast <4 x i32>* %h1 to i8*
-  call void @llvm.ppc.altivec.stvxl(<4 x i32> %d, i8* %hv)
-
-  %v0 = load <4 x i32>, <4 x i32>* %h, align 8
-  ret <4 x i32> %v0
-
-; CHECK-LABEL: @test2l
-; CHECK-NOT: @llvm.ppc.altivec.stvxl
-; CHECK: ret <4 x i32>
-}
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-

Removed: llvm/trunk/test/Transforms/InstCombine/aligned-qpx.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/aligned-qpx.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/aligned-qpx.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/aligned-qpx.ll (removed)
@@ -1,165 +0,0 @@
-; RUN: opt -S -instcombine < %s | FileCheck %s
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
-target triple = "powerpc64-unknown-linux-gnu"
-
-declare <4 x double> @llvm.ppc.qpx.qvlfs(i8*) #1
-
-define <4 x double> @test1(<4 x float>* %h) #0 {
-entry:
-  %h1 = getelementptr <4 x float>, <4 x float>* %h, i64 1
-  %hv = bitcast <4 x float>* %h1 to i8*
-  %vl = call <4 x double> @llvm.ppc.qpx.qvlfs(i8* %hv)
-
-; CHECK-LABEL: @test1
-; CHECK: @llvm.ppc.qpx.qvlfs
-; CHECK: ret <4 x double>
-
-  %v0 = load <4 x float>, <4 x float>* %h, align 8
-  %v0e = fpext <4 x float> %v0 to <4 x double>
-  %a = fadd <4 x double> %v0e, %vl
-  ret <4 x double> %a
-}
-
-define <4 x double> @test1a(<4 x float>* align 16 %h) #0 {
-entry:
-  %h1 = getelementptr <4 x float>, <4 x float>* %h, i64 1
-  %hv = bitcast <4 x float>* %h1 to i8*
-  %vl = call <4 x double> @llvm.ppc.qpx.qvlfs(i8* %hv)
-
-; CHECK-LABEL: @test1a
-; CHECK-NOT: @llvm.ppc.qpx.qvlfs
-; CHECK-NOT: load <4 x double>
-; CHECK: ret <4 x double>
-
-  %v0 = load <4 x float>, <4 x float>* %h, align 8
-  %v0e = fpext <4 x float> %v0 to <4 x double>
-  %a = fadd <4 x double> %v0e, %vl
-  ret <4 x double> %a
-}
-
-declare void @llvm.ppc.qpx.qvstfs(<4 x double>, i8*) #0
-
-define <4 x float> @test2(<4 x float>* %h, <4 x double> %d) #0 {
-entry:
-  %h1 = getelementptr <4 x float>, <4 x float>* %h, i64 1
-  %hv = bitcast <4 x float>* %h1 to i8*
-  call void @llvm.ppc.qpx.qvstfs(<4 x double> %d, i8* %hv)
-
-  %v0 = load <4 x float>, <4 x float>* %h, align 8
-  ret <4 x float> %v0
-
-; CHECK-LABEL: @test2
-; CHECK: @llvm.ppc.qpx.qvstfs
-; CHECK: ret <4 x float>
-}
-
-define <4 x float> @test2a(<4 x float>* align 16 %h, <4 x double> %d) #0 {
-entry:
-  %h1 = getelementptr <4 x float>, <4 x float>* %h, i64 1
-  %hv = bitcast <4 x float>* %h1 to i8*
-  call void @llvm.ppc.qpx.qvstfs(<4 x double> %d, i8* %hv)
-
-  %v0 = load <4 x float>, <4 x float>* %h, align 8
-  ret <4 x float> %v0
-
-; CHECK-LABEL: @test2
-; CHECK: fptrunc <4 x double> %d to <4 x float>
-; CHECK-NOT: @llvm.ppc.qpx.qvstfs
-; CHECK-NOT: store <4 x double>
-; CHECK: ret <4 x float>
-}
-
-declare <4 x double> @llvm.ppc.qpx.qvlfd(i8*) #1
-
-define <4 x double> @test1l(<4 x double>* %h) #0 {
-entry:
-  %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1
-  %hv = bitcast <4 x double>* %h1 to i8*
-  %vl = call <4 x double> @llvm.ppc.qpx.qvlfd(i8* %hv)
-
-; CHECK-LABEL: @test1l
-; CHECK: @llvm.ppc.qpx.qvlfd
-; CHECK: ret <4 x double>
-
-  %v0 = load <4 x double>, <4 x double>* %h, align 8
-  %a = fadd <4 x double> %v0, %vl
-  ret <4 x double> %a
-}
-
-define <4 x double> @test1ln(<4 x double>* align 16 %h) #0 {
-entry:
-  %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1
-  %hv = bitcast <4 x double>* %h1 to i8*
-  %vl = call <4 x double> @llvm.ppc.qpx.qvlfd(i8* %hv)
-
-; CHECK-LABEL: @test1ln
-; CHECK: @llvm.ppc.qpx.qvlfd
-; CHECK: ret <4 x double>
-
-  %v0 = load <4 x double>, <4 x double>* %h, align 8
-  %a = fadd <4 x double> %v0, %vl
-  ret <4 x double> %a
-}
-
-define <4 x double> @test1la(<4 x double>* align 32 %h) #0 {
-entry:
-  %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1
-  %hv = bitcast <4 x double>* %h1 to i8*
-  %vl = call <4 x double> @llvm.ppc.qpx.qvlfd(i8* %hv)
-
-; CHECK-LABEL: @test1la
-; CHECK-NOT: @llvm.ppc.qpx.qvlfd
-; CHECK: ret <4 x double>
-
-  %v0 = load <4 x double>, <4 x double>* %h, align 8
-  %a = fadd <4 x double> %v0, %vl
-  ret <4 x double> %a
-}
-
-declare void @llvm.ppc.qpx.qvstfd(<4 x double>, i8*) #0
-
-define <4 x double> @test2l(<4 x double>* %h, <4 x double> %d) #0 {
-entry:
-  %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1
-  %hv = bitcast <4 x double>* %h1 to i8*
-  call void @llvm.ppc.qpx.qvstfd(<4 x double> %d, i8* %hv)
-
-  %v0 = load <4 x double>, <4 x double>* %h, align 8
-  ret <4 x double> %v0
-
-; CHECK-LABEL: @test2l
-; CHECK: @llvm.ppc.qpx.qvstfd
-; CHECK: ret <4 x double>
-}
-
-define <4 x double> @test2ln(<4 x double>* align 16 %h, <4 x double> %d) #0 {
-entry:
-  %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1
-  %hv = bitcast <4 x double>* %h1 to i8*
-  call void @llvm.ppc.qpx.qvstfd(<4 x double> %d, i8* %hv)
-
-  %v0 = load <4 x double>, <4 x double>* %h, align 8
-  ret <4 x double> %v0
-
-; CHECK-LABEL: @test2ln
-; CHECK: @llvm.ppc.qpx.qvstfd
-; CHECK: ret <4 x double>
-}
-
-define <4 x double> @test2la(<4 x double>* align 32 %h, <4 x double> %d) #0 {
-entry:
-  %h1 = getelementptr <4 x double>, <4 x double>* %h, i64 1
-  %hv = bitcast <4 x double>* %h1 to i8*
-  call void @llvm.ppc.qpx.qvstfd(<4 x double> %d, i8* %hv)
-
-  %v0 = load <4 x double>, <4 x double>* %h, align 8
-  ret <4 x double> %v0
-
-; CHECK-LABEL: @test2l
-; CHECK-NOT: @llvm.ppc.qpx.qvstfd
-; CHECK: ret <4 x double>
-}
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-

Removed: llvm/trunk/test/Transforms/InstCombine/amdgcn-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/amdgcn-intrinsics.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/amdgcn-intrinsics.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/amdgcn-intrinsics.ll (removed)
@@ -1,1540 +0,0 @@
-; RUN: opt -instcombine -S < %s | FileCheck %s
-
-; --------------------------------------------------------------------
-; llvm.amdgcn.rcp
-; --------------------------------------------------------------------
-
-declare float @llvm.amdgcn.rcp.f32(float) nounwind readnone
-declare double @llvm.amdgcn.rcp.f64(double) nounwind readnone
-
-; CHECK-LABEL: @test_constant_fold_rcp_f32_undef
-; CHECK-NEXT: ret float undef
-define float @test_constant_fold_rcp_f32_undef() nounwind {
-  %val = call float @llvm.amdgcn.rcp.f32(float undef) nounwind readnone
-  ret float %val
-}
-
-; CHECK-LABEL: @test_constant_fold_rcp_f32_1
-; CHECK-NEXT: ret float 1.000000e+00
-define float @test_constant_fold_rcp_f32_1() nounwind {
-  %val = call float @llvm.amdgcn.rcp.f32(float 1.0) nounwind readnone
-  ret float %val
-}
-
-; CHECK-LABEL: @test_constant_fold_rcp_f64_1
-; CHECK-NEXT:  ret double 1.000000e+00
-define double @test_constant_fold_rcp_f64_1() nounwind {
-  %val = call double @llvm.amdgcn.rcp.f64(double 1.0) nounwind readnone
-  ret double %val
-}
-
-; CHECK-LABEL: @test_constant_fold_rcp_f32_half
-; CHECK-NEXT: ret float 2.000000e+00
-define float @test_constant_fold_rcp_f32_half() nounwind {
-  %val = call float @llvm.amdgcn.rcp.f32(float 0.5) nounwind readnone
-  ret float %val
-}
-
-; CHECK-LABEL: @test_constant_fold_rcp_f64_half
-; CHECK-NEXT:  ret double 2.000000e+00
-define double @test_constant_fold_rcp_f64_half() nounwind {
-  %val = call double @llvm.amdgcn.rcp.f64(double 0.5) nounwind readnone
-  ret double %val
-}
-
-; CHECK-LABEL: @test_constant_fold_rcp_f32_43
-; CHECK-NEXT: call float @llvm.amdgcn.rcp.f32(float 4.300000e+01)
-define float @test_constant_fold_rcp_f32_43() nounwind {
- %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) nounwind readnone
- ret float %val
-}
-
-; CHECK-LABEL: @test_constant_fold_rcp_f64_43
-; CHECK-NEXT: call double @llvm.amdgcn.rcp.f64(double 4.300000e+01)
-define double @test_constant_fold_rcp_f64_43() nounwind {
-  %val = call double @llvm.amdgcn.rcp.f64(double 4.300000e+01) nounwind readnone
-  ret double %val
-}
-
-; --------------------------------------------------------------------
-; llvm.amdgcn.rsq
-; --------------------------------------------------------------------
-
-declare float @llvm.amdgcn.rsq.f32(float) nounwind readnone
-
-; CHECK-LABEL: @test_constant_fold_rsq_f32_undef
-; CHECK-NEXT: ret float undef
-define float @test_constant_fold_rsq_f32_undef() nounwind {
-  %val = call float @llvm.amdgcn.rsq.f32(float undef) nounwind readnone
-  ret float %val
-}
-
-; --------------------------------------------------------------------
-; llvm.amdgcn.frexp.mant
-; --------------------------------------------------------------------
-
-declare float @llvm.amdgcn.frexp.mant.f32(float) nounwind readnone
-declare double @llvm.amdgcn.frexp.mant.f64(double) nounwind readnone
-
-
-; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_undef(
-; CHECK-NEXT: ret float undef
-define float @test_constant_fold_frexp_mant_f32_undef() nounwind {
-  %val = call float @llvm.amdgcn.frexp.mant.f32(float undef)
-  ret float %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_undef(
-; CHECK-NEXT:  ret double undef
-define double @test_constant_fold_frexp_mant_f64_undef() nounwind {
-  %val = call double @llvm.amdgcn.frexp.mant.f64(double undef)
-  ret double %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_0(
-; CHECK-NEXT: ret float 0.000000e+00
-define float @test_constant_fold_frexp_mant_f32_0() nounwind {
-  %val = call float @llvm.amdgcn.frexp.mant.f32(float 0.0)
-  ret float %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_0(
-; CHECK-NEXT:  ret double 0.000000e+00
-define double @test_constant_fold_frexp_mant_f64_0() nounwind {
-  %val = call double @llvm.amdgcn.frexp.mant.f64(double 0.0)
-  ret double %val
-}
-
-
-; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_n0(
-; CHECK-NEXT: ret float -0.000000e+00
-define float @test_constant_fold_frexp_mant_f32_n0() nounwind {
-  %val = call float @llvm.amdgcn.frexp.mant.f32(float -0.0)
-  ret float %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_n0(
-; CHECK-NEXT:  ret double -0.000000e+00
-define double @test_constant_fold_frexp_mant_f64_n0() nounwind {
-  %val = call double @llvm.amdgcn.frexp.mant.f64(double -0.0)
-  ret double %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_1(
-; CHECK-NEXT: ret float 5.000000e-01
-define float @test_constant_fold_frexp_mant_f32_1() nounwind {
-  %val = call float @llvm.amdgcn.frexp.mant.f32(float 1.0)
-  ret float %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_1(
-; CHECK-NEXT:  ret double 5.000000e-01
-define double @test_constant_fold_frexp_mant_f64_1() nounwind {
-  %val = call double @llvm.amdgcn.frexp.mant.f64(double 1.0)
-  ret double %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_n1(
-; CHECK-NEXT: ret float -5.000000e-01
-define float @test_constant_fold_frexp_mant_f32_n1() nounwind {
-  %val = call float @llvm.amdgcn.frexp.mant.f32(float -1.0)
-  ret float %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_n1(
-; CHECK-NEXT:  ret double -5.000000e-01
-define double @test_constant_fold_frexp_mant_f64_n1() nounwind {
-  %val = call double @llvm.amdgcn.frexp.mant.f64(double -1.0)
-  ret double %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_nan(
-; CHECK-NEXT: ret float 0x7FF8000000000000
-define float @test_constant_fold_frexp_mant_f32_nan() nounwind {
-  %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x7FF8000000000000)
-  ret float %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_nan(
-; CHECK-NEXT:  ret double 0x7FF8000000000000
-define double @test_constant_fold_frexp_mant_f64_nan() nounwind {
-  %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FF8000000000000)
-  ret double %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_inf(
-; CHECK-NEXT: ret float 0x7FF0000000000000
-define float @test_constant_fold_frexp_mant_f32_inf() nounwind {
-  %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x7FF0000000000000)
-  ret float %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_inf(
-; CHECK-NEXT:  ret double 0x7FF0000000000000
-define double @test_constant_fold_frexp_mant_f64_inf() nounwind {
-  %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FF0000000000000)
-  ret double %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_ninf(
-; CHECK-NEXT: ret float 0xFFF0000000000000
-define float @test_constant_fold_frexp_mant_f32_ninf() nounwind {
-  %val = call float @llvm.amdgcn.frexp.mant.f32(float 0xFFF0000000000000)
-  ret float %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_ninf(
-; CHECK-NEXT:  ret double 0xFFF0000000000000
-define double @test_constant_fold_frexp_mant_f64_ninf() nounwind {
-  %val = call double @llvm.amdgcn.frexp.mant.f64(double 0xFFF0000000000000)
-  ret double %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_max_num(
-; CHECK-NEXT: ret float 0x3FEFFFFFE0000000
-define float @test_constant_fold_frexp_mant_f32_max_num() nounwind {
-  %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x47EFFFFFE0000000)
-  ret float %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_max_num(
-; CHECK-NEXT:  ret double 0x3FEFFFFFFFFFFFFF
-define double @test_constant_fold_frexp_mant_f64_max_num() nounwind {
-  %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FEFFFFFFFFFFFFF)
-  ret double %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_min_num(
-; CHECK-NEXT: ret float 5.000000e-01
-define float @test_constant_fold_frexp_mant_f32_min_num() nounwind {
-  %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x36A0000000000000)
-  ret float %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_min_num(
-; CHECK-NEXT:  ret double 5.000000e-01
-define double @test_constant_fold_frexp_mant_f64_min_num() nounwind {
-  %val = call double @llvm.amdgcn.frexp.mant.f64(double 4.940656e-324)
-  ret double %val
-}
-
-
-; --------------------------------------------------------------------
-; llvm.amdgcn.frexp.exp
-; --------------------------------------------------------------------
-
-declare i32 @llvm.amdgcn.frexp.exp.f32(float) nounwind readnone
-declare i32 @llvm.amdgcn.frexp.exp.f64(double) nounwind readnone
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_undef(
-; CHECK-NEXT: ret i32 undef
-define i32 @test_constant_fold_frexp_exp_f32_undef() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float undef)
-  ret i32 %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_undef(
-; CHECK-NEXT:  ret i32 undef
-define i32 @test_constant_fold_frexp_exp_f64_undef() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double undef)
-  ret i32 %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_0(
-; CHECK-NEXT: ret i32 0
-define i32 @test_constant_fold_frexp_exp_f32_0() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0.0)
-  ret i32 %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_0(
-; CHECK-NEXT:  ret i32 0
-define i32 @test_constant_fold_frexp_exp_f64_0() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0.0)
-  ret i32 %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_n0(
-; CHECK-NEXT: ret i32 0
-define i32 @test_constant_fold_frexp_exp_f32_n0() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float -0.0)
-  ret i32 %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_n0(
-; CHECK-NEXT:  ret i32 0
-define i32 @test_constant_fold_frexp_exp_f64_n0() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double -0.0)
-  ret i32 %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_1024(
-; CHECK-NEXT: ret i32 11
-define i32 @test_constant_fold_frexp_exp_f32_1024() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 1024.0)
-  ret i32 %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_1024(
-; CHECK-NEXT:  ret i32 11
-define i32 @test_constant_fold_frexp_exp_f64_1024() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 1024.0)
-  ret i32 %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_n1024(
-; CHECK-NEXT: ret i32 11
-define i32 @test_constant_fold_frexp_exp_f32_n1024() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float -1024.0)
-  ret i32 %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_n1024(
-; CHECK-NEXT:  ret i32 11
-define i32 @test_constant_fold_frexp_exp_f64_n1024() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double -1024.0)
-  ret i32 %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_1_1024(
-; CHECK-NEXT: ret i32 -9
-define i32 @test_constant_fold_frexp_exp_f32_1_1024() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0.0009765625)
-  ret i32 %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_1_1024(
-; CHECK-NEXT:  ret i32 -9
-define i32 @test_constant_fold_frexp_exp_f64_1_1024() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0.0009765625)
-  ret i32 %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_nan(
-; CHECK-NEXT: ret i32 0
-define i32 @test_constant_fold_frexp_exp_f32_nan() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x7FF8000000000000)
-  ret i32 %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_nan(
-; CHECK-NEXT:  ret i32 0
-define i32 @test_constant_fold_frexp_exp_f64_nan() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FF8000000000000)
-  ret i32 %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_inf(
-; CHECK-NEXT: ret i32 0
-define i32 @test_constant_fold_frexp_exp_f32_inf() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x7FF0000000000000)
-  ret i32 %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_inf(
-; CHECK-NEXT:  ret i32 0
-define i32 @test_constant_fold_frexp_exp_f64_inf() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FF0000000000000)
-  ret i32 %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_ninf(
-; CHECK-NEXT: ret i32 0
-define i32 @test_constant_fold_frexp_exp_f32_ninf() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0xFFF0000000000000)
-  ret i32 %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_ninf(
-; CHECK-NEXT:  ret i32 0
-define i32 @test_constant_fold_frexp_exp_f64_ninf() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0xFFF0000000000000)
-  ret i32 %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_max_num(
-; CHECK-NEXT: ret i32 128
-define i32 @test_constant_fold_frexp_exp_f32_max_num() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x47EFFFFFE0000000)
-  ret i32 %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_max_num(
-; CHECK-NEXT:  ret i32 1024
-define i32 @test_constant_fold_frexp_exp_f64_max_num() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FEFFFFFFFFFFFFF)
-  ret i32 %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_min_num(
-; CHECK-NEXT: ret i32 -148
-define i32 @test_constant_fold_frexp_exp_f32_min_num() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x36A0000000000000)
-  ret i32 %val
-}
-
-; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_min_num(
-; CHECK-NEXT:  ret i32 -1073
-define i32 @test_constant_fold_frexp_exp_f64_min_num() nounwind {
-  %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 4.940656e-324)
-  ret i32 %val
-}
-
-; --------------------------------------------------------------------
-; llvm.amdgcn.class
-; --------------------------------------------------------------------
-
-declare i1 @llvm.amdgcn.class.f32(float, i32) nounwind readnone
-declare i1 @llvm.amdgcn.class.f64(double, i32) nounwind readnone
-
-; CHECK-LABEL: @test_class_undef_mask_f32(
-; CHECK: ret i1 false
-define i1 @test_class_undef_mask_f32(float %x) nounwind {
-  %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 undef)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_class_over_max_mask_f32(
-; CHECK: %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1)
-define i1 @test_class_over_max_mask_f32(float %x) nounwind {
-  %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1025)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_class_no_mask_f32(
-; CHECK: ret i1 false
-define i1 @test_class_no_mask_f32(float %x) nounwind {
-  %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 0)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_class_full_mask_f32(
-; CHECK: ret i1 true
-define i1 @test_class_full_mask_f32(float %x) nounwind {
-  %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 1023)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_class_undef_no_mask_f32(
-; CHECK: ret i1 false
-define i1 @test_class_undef_no_mask_f32() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 0)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_class_undef_full_mask_f32(
-; CHECK: ret i1 true
-define i1 @test_class_undef_full_mask_f32() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 1023)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_class_undef_val_f32(
-; CHECK: ret i1 undef
-define i1 @test_class_undef_val_f32() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 4)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_class_undef_undef_f32(
-; CHECK: ret i1 undef
-define i1 @test_class_undef_undef_f32() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f32(float undef, i32 undef)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_class_var_mask_f32(
-; CHECK: %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 %mask)
-define i1 @test_class_var_mask_f32(float %x, i32 %mask) nounwind {
-  %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 %mask)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_class_isnan_f32(
-; CHECK: %val = fcmp uno float %x, 0.000000e+00
-define i1 @test_class_isnan_f32(float %x) nounwind {
-  %val = call i1 @llvm.amdgcn.class.f32(float %x, i32 3)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_snan_test_snan_f64(
-; CHECK: ret i1 true
-define i1 @test_constant_class_snan_test_snan_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 1)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_qnan_test_qnan_f64(
-; CHECK: ret i1 true
-define i1 @test_constant_class_qnan_test_qnan_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 2)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_qnan_test_snan_f64(
-; CHECK: ret i1 false
-define i1 @test_constant_class_qnan_test_snan_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 1)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_ninf_test_ninf_f64(
-; CHECK: ret i1 true
-define i1 @test_constant_class_ninf_test_ninf_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double 0xFFF0000000000000, i32 4)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_pinf_test_ninf_f64(
-; CHECK: ret i1 false
-define i1 @test_constant_class_pinf_test_ninf_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000000, i32 4)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_qnan_test_ninf_f64(
-; CHECK: ret i1 false
-define i1 @test_constant_class_qnan_test_ninf_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 4)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_snan_test_ninf_f64(
-; CHECK: ret i1 false
-define i1 @test_constant_class_snan_test_ninf_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 4)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_nnormal_test_nnormal_f64(
-; CHECK: ret i1 true
-define i1 @test_constant_class_nnormal_test_nnormal_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double -1.0, i32 8)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_pnormal_test_nnormal_f64(
-; CHECK: ret i1 false
-define i1 @test_constant_class_pnormal_test_nnormal_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 8)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_nsubnormal_test_nsubnormal_f64(
-; CHECK: ret i1 true
-define i1 @test_constant_class_nsubnormal_test_nsubnormal_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double 0x800fffffffffffff, i32 16)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_psubnormal_test_nsubnormal_f64(
-; CHECK: ret i1 false
-define i1 @test_constant_class_psubnormal_test_nsubnormal_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double 0x000fffffffffffff, i32 16)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_nzero_test_nzero_f64(
-; CHECK: ret i1 true
-define i1 @test_constant_class_nzero_test_nzero_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double -0.0, i32 32)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_pzero_test_nzero_f64(
-; CHECK: ret i1 false
-define i1 @test_constant_class_pzero_test_nzero_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double 0.0, i32 32)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_pzero_test_pzero_f64(
-; CHECK: ret i1 true
-define i1 @test_constant_class_pzero_test_pzero_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double 0.0, i32 64)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_nzero_test_pzero_f64(
-; CHECK: ret i1 false
-define i1 @test_constant_class_nzero_test_pzero_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double -0.0, i32 64)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_psubnormal_test_psubnormal_f64(
-; CHECK: ret i1 true
-define i1 @test_constant_class_psubnormal_test_psubnormal_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double 0x000fffffffffffff, i32 128)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_nsubnormal_test_psubnormal_f64(
-; CHECK: ret i1 false
-define i1 @test_constant_class_nsubnormal_test_psubnormal_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double 0x800fffffffffffff, i32 128)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_pnormal_test_pnormal_f64(
-; CHECK: ret i1 true
-define i1 @test_constant_class_pnormal_test_pnormal_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double 1.0, i32 256)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_nnormal_test_pnormal_f64(
-; CHECK: ret i1 false
-define i1 @test_constant_class_nnormal_test_pnormal_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double -1.0, i32 256)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_pinf_test_pinf_f64(
-; CHECK: ret i1 true
-define i1 @test_constant_class_pinf_test_pinf_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000000, i32 512)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_ninf_test_pinf_f64(
-; CHECK: ret i1 false
-define i1 @test_constant_class_ninf_test_pinf_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double 0xFFF0000000000000, i32 512)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_qnan_test_pinf_f64(
-; CHECK: ret i1 false
-define i1 @test_constant_class_qnan_test_pinf_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF8000000000000, i32 512)
-  ret i1 %val
-}
-
-; CHECK-LABEL: @test_constant_class_snan_test_pinf_f64(
-; CHECK: ret i1 false
-define i1 @test_constant_class_snan_test_pinf_f64() nounwind {
-  %val = call i1 @llvm.amdgcn.class.f64(double 0x7FF0000000000001, i32 512)
-  ret i1 %val
-}
-
-; --------------------------------------------------------------------
-; llvm.amdgcn.cos
-; --------------------------------------------------------------------
-declare float @llvm.amdgcn.cos.f32(float) nounwind readnone
-declare float @llvm.fabs.f32(float) nounwind readnone
-
-; CHECK-LABEL: @cos_fneg_f32(
-; CHECK: %cos = call float @llvm.amdgcn.cos.f32(float %x)
-; CHECK-NEXT: ret float %cos
-define float @cos_fneg_f32(float %x) {
-  %x.fneg = fsub float -0.0, %x
-  %cos = call float @llvm.amdgcn.cos.f32(float %x.fneg)
-  ret float %cos
-}
-
-; CHECK-LABEL: @cos_fabs_f32(
-; CHECK-NEXT: %cos = call float @llvm.amdgcn.cos.f32(float %x)
-; CHECK-NEXT: ret float %cos
-define float @cos_fabs_f32(float %x) {
-  %x.fabs = call float @llvm.fabs.f32(float %x)
-  %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs)
-  ret float %cos
-}
-
-; CHECK-LABEL: @cos_fabs_fneg_f32(
-; CHECK-NEXT: %cos = call float @llvm.amdgcn.cos.f32(float %x)
-; CHECK-NEXT: ret float %cos
-define float @cos_fabs_fneg_f32(float %x) {
-  %x.fabs = call float @llvm.fabs.f32(float %x)
-  %x.fabs.fneg = fsub float -0.0, %x.fabs
-  %cos = call float @llvm.amdgcn.cos.f32(float %x.fabs.fneg)
-  ret float %cos
-}
-
-; --------------------------------------------------------------------
-; llvm.amdgcn.cvt.pkrtz
-; --------------------------------------------------------------------
-
-declare <2 x half> @llvm.amdgcn.cvt.pkrtz(float, float) nounwind readnone
-
-; CHECK-LABEL: @vars_lhs_cvt_pkrtz(
-; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
-define <2 x half> @vars_lhs_cvt_pkrtz(float %x, float %y) {
-  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float %y)
-  ret <2 x half> %cvt
-}
-
-; CHECK-LABEL: @constant_lhs_cvt_pkrtz(
-; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.000000e+00, float %y)
-define <2 x half> @constant_lhs_cvt_pkrtz(float %y) {
-  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.0, float %y)
-  ret <2 x half> %cvt
-}
-
-; CHECK-LABEL: @constant_rhs_cvt_pkrtz(
-; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float 0.000000e+00)
-define <2 x half> @constant_rhs_cvt_pkrtz(float %x) {
-  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float 0.0)
-  ret <2 x half> %cvt
-}
-
-; CHECK-LABEL: @undef_lhs_cvt_pkrtz(
-; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %y)
-define <2 x half> @undef_lhs_cvt_pkrtz(float %y) {
-  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float %y)
-  ret <2 x half> %cvt
-}
-
-; CHECK-LABEL: @undef_rhs_cvt_pkrtz(
-; CHECK: %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float undef)
-define <2 x half> @undef_rhs_cvt_pkrtz(float %x) {
-  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float %x, float undef)
-  ret <2 x half> %cvt
-}
-
-; CHECK-LABEL: @undef_cvt_pkrtz(
-; CHECK: ret <2 x half> undef
-define <2 x half> @undef_cvt_pkrtz() {
-  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float undef, float undef)
-  ret <2 x half> %cvt
-}
-
-; CHECK-LABEL: @constant_splat0_cvt_pkrtz(
-; CHECK: ret <2 x half> zeroinitializer
-define <2 x half> @constant_splat0_cvt_pkrtz() {
-  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 0.0, float 0.0)
-  ret <2 x half> %cvt
-}
-
-; CHECK-LABEL: @constant_cvt_pkrtz(
-; CHECK: ret <2 x half> <half 0xH4000, half 0xH4400>
-define <2 x half> @constant_cvt_pkrtz() {
-  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 2.0, float 4.0)
-  ret <2 x half> %cvt
-}
-
-; Test constant values where rtz changes result
-; CHECK-LABEL: @constant_rtz_pkrtz(
-; CHECK: ret <2 x half> <half 0xH7BFF, half 0xH7BFF>
-define <2 x half> @constant_rtz_pkrtz() {
-  %cvt = call <2 x half> @llvm.amdgcn.cvt.pkrtz(float 65535.0, float 65535.0)
-  ret <2 x half> %cvt
-}
-
-; --------------------------------------------------------------------
-; llvm.amdgcn.ubfe
-; --------------------------------------------------------------------
-
-declare i32 @llvm.amdgcn.ubfe.i32(i32, i32, i32) nounwind readnone
-declare i64 @llvm.amdgcn.ubfe.i64(i64, i32, i32) nounwind readnone
-
-; CHECK-LABEL: @ubfe_var_i32(
-; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 %width)
-define i32 @ubfe_var_i32(i32 %src, i32 %offset, i32 %width) {
-  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 %width)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @ubfe_clear_high_bits_constant_offset_i32(
-; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 5, i32 %width)
-define i32 @ubfe_clear_high_bits_constant_offset_i32(i32 %src, i32 %width) {
-  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 133, i32 %width)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @ubfe_clear_high_bits_constant_width_i32(
-; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 5)
-define i32 @ubfe_clear_high_bits_constant_width_i32(i32 %src, i32 %offset) {
-  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 133)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @ubfe_width_0(
-; CHECK-NEXT: ret i32 0
-define i32 @ubfe_width_0(i32 %src, i32 %offset) {
-  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 0)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @ubfe_width_31(
-; CHECK: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 31)
-define i32 @ubfe_width_31(i32 %src, i32 %offset) {
-  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 31)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @ubfe_width_32(
-; CHECK-NEXT: ret i32 0
-define i32 @ubfe_width_32(i32 %src, i32 %offset) {
-  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 32)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @ubfe_width_33(
-; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 1)
-define i32 @ubfe_width_33(i32 %src, i32 %offset) {
-  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 33)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @ubfe_offset_33(
-; CHECK-NEXT: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 1, i32 %width)
-define i32 @ubfe_offset_33(i32 %src, i32 %width) {
-  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 33, i32 %width)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @ubfe_offset_0(
-; CHECK-NEXT: %1 = sub i32 32, %width
-; CHECK-NEXT: %2 = shl i32 %src, %1
-; CHECK-NEXT: %bfe = lshr i32 %2, %1
-; CHECK-NEXT: ret i32 %bfe
-define i32 @ubfe_offset_0(i32 %src, i32 %width) {
-  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 %width)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @ubfe_offset_32(
-; CHECK-NEXT: %1 = sub i32 32, %width
-; CHECK-NEXT: %2 = shl i32 %src, %1
-; CHECK-NEXT: %bfe = lshr i32 %2, %1
-; CHECK-NEXT: ret i32 %bfe
-define i32 @ubfe_offset_32(i32 %src, i32 %width) {
-  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 32, i32 %width)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @ubfe_offset_31(
-; CHECK-NEXT: %1 = sub i32 32, %width
-; CHECK-NEXT: %2 = shl i32 %src, %1
-; CHECK-NEXT: %bfe = lshr i32 %2, %1
-; CHECK-NEXT: ret i32 %bfe
-define i32 @ubfe_offset_31(i32 %src, i32 %width) {
-  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 32, i32 %width)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @ubfe_offset_0_width_0(
-; CHECK-NEXT: ret i32 0
-define i32 @ubfe_offset_0_width_0(i32 %src) {
-  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 0)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @ubfe_offset_0_width_3(
-; CHECK-NEXT: and i32 %src, 7
-; CHECK-NEXT: ret
-define i32 @ubfe_offset_0_width_3(i32 %src) {
-  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 0, i32 3)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @ubfe_offset_3_width_1(
-; CHECK-NEXT: %1 = lshr i32 %src, 3
-; CHECK-NEXT: and i32 %1, 1
-; CHECK-NEXT: ret i32
-define i32 @ubfe_offset_3_width_1(i32 %src) {
-  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 3, i32 1)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @ubfe_offset_3_width_4(
-; CHECK-NEXT: %1 = lshr i32 %src, 3
-; CHECK-NEXT: and i32 %1, 15
-; CHECK-NEXT: ret i32
-define i32 @ubfe_offset_3_width_4(i32 %src) {
-  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 3, i32 4)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @ubfe_0_0_0(
-; CHECK-NEXT: ret i32 0
-define i32 @ubfe_0_0_0() {
-  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 0, i32 0, i32 0)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @ubfe_neg1_5_7(
-; CHECK-NEXT: ret i32 127
-define i32 @ubfe_neg1_5_7() {
-  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 -1, i32 5, i32 7)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @ubfe_undef_src_i32(
-; CHECK-NEXT: ret i32 undef
-define i32 @ubfe_undef_src_i32(i32 %offset, i32 %width) {
-  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 undef, i32 %offset, i32 %width)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @ubfe_undef_offset_i32(
-; CHECK: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 undef, i32 %width)
-define i32 @ubfe_undef_offset_i32(i32 %src, i32 %width) {
-  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 undef, i32 %width)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @ubfe_undef_width_i32(
-; CHECK: %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 undef)
-define i32 @ubfe_undef_width_i32(i32 %src, i32 %offset) {
-  %bfe = call i32 @llvm.amdgcn.ubfe.i32(i32 %src, i32 %offset, i32 undef)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @ubfe_offset_33_width_4_i64(
-; CHECK-NEXT: %1 = lshr i64 %src, 33
-; CHECK-NEXT: %bfe = and i64 %1, 15
-define i64 @ubfe_offset_33_width_4_i64(i64 %src) {
-  %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 33, i32 4)
-  ret i64 %bfe
-}
-
-; CHECK-LABEL: @ubfe_offset_0_i64(
-; CHECK-NEXT: %1 = sub i32 64, %width
-; CHECK-NEXT: %2 = zext i32 %1 to i64
-; CHECK-NEXT: %3 = shl i64 %src, %2
-; CHECK-NEXT: %bfe = lshr i64 %3, %2
-; CHECK-NEXT: ret i64 %bfe
-define i64 @ubfe_offset_0_i64(i64 %src, i32 %width) {
-  %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 0, i32 %width)
-  ret i64 %bfe
-}
-
-; CHECK-LABEL: @ubfe_offset_32_width_32_i64(
-; CHECK-NEXT: %bfe = lshr i64 %src, 32
-; CHECK-NEXT: ret i64 %bfe
-define i64 @ubfe_offset_32_width_32_i64(i64 %src) {
-  %bfe = call i64 @llvm.amdgcn.ubfe.i64(i64 %src, i32 32, i32 32)
-  ret i64 %bfe
-}
-
-; --------------------------------------------------------------------
-; llvm.amdgcn.sbfe
-; --------------------------------------------------------------------
-
-declare i32 @llvm.amdgcn.sbfe.i32(i32, i32, i32) nounwind readnone
-declare i64 @llvm.amdgcn.sbfe.i64(i64, i32, i32) nounwind readnone
-
-; CHECK-LABEL: @sbfe_offset_31(
-; CHECK-NEXT: %1 = sub i32 32, %width
-; CHECK-NEXT: %2 = shl i32 %src, %1
-; CHECK-NEXT: %bfe = ashr i32 %2, %1
-; CHECK-NEXT: ret i32 %bfe
-define i32 @sbfe_offset_31(i32 %src, i32 %width) {
-  %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 %src, i32 32, i32 %width)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @sbfe_neg1_5_7(
-; CHECK-NEXT: ret i32 -1
-define i32 @sbfe_neg1_5_7() {
-  %bfe = call i32 @llvm.amdgcn.sbfe.i32(i32 -1, i32 5, i32 7)
-  ret i32 %bfe
-}
-
-; CHECK-LABEL: @sbfe_offset_32_width_32_i64(
-; CHECK-NEXT: %bfe = ashr i64 %src, 32
-; CHECK-NEXT: ret i64 %bfe
-define i64 @sbfe_offset_32_width_32_i64(i64 %src) {
-  %bfe = call i64 @llvm.amdgcn.sbfe.i64(i64 %src, i32 32, i32 32)
-  ret i64 %bfe
-}
-
-; --------------------------------------------------------------------
-; llvm.amdgcn.exp
-; --------------------------------------------------------------------
-
-declare void @llvm.amdgcn.exp.f32(i32, i32, float, float, float, float, i1, i1) nounwind inaccessiblememonly
-
-; Make sure no crashing on invalid variable params
-; CHECK-LABEL: @exp_invalid_inputs(
-; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 true, i1 false)
-define void @exp_invalid_inputs(i32 %tgt, i32 %en) {
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 %en, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
-  call void @llvm.amdgcn.exp.f32(i32 %tgt, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
-  ret void
-}
-
-; CHECK-LABEL: @exp_disabled_inputs_to_undef(
-; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.000000e+00, float undef, float undef, float undef, i1 true, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float undef, float 2.000000e+00, float undef, float undef, i1 true, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float undef, float undef, float 5.000000e-01, float undef, i1 true, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float undef, float undef, float undef, float 4.000000e+00, i1 true, i1 false)
-
-; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float %x, float undef, float undef, float undef, i1 true, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float undef, float %y, float undef, float undef, i1 true, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float undef, float undef, float %z, float undef, i1 true, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float undef, float undef, float undef, float %w, i1 true, i1 false)
-
-; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float undef, float undef, float undef, float undef, i1 true, i1 false)
-
-; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.000000e+00, float 2.000000e+00, float undef, float undef, i1 true, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.000000e+00, float undef, float 5.000000e-01, float undef, i1 true, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.000000e+00, float undef, float undef, float 4.000000e+00, i1 false, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.000000e+00, float 2.000000e+00, float 5.000000e-01, float 4.000000e+00, i1 false, i1 false)
-define void @exp_disabled_inputs_to_undef(float %x, float %y, float %z, float %w) {
-  ; enable src0..src3 constants
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
-
-  ; enable src0..src3 variables
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 1, float %x, float %y, float %z, float %w, i1 true, i1 false)
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 2, float %x, float %y, float %z, float %w, i1 true, i1 false)
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 4, float %x, float %y, float %z, float %w, i1 true, i1 false)
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 8, float %x, float %y, float %z, float %w, i1 true, i1 false)
-
-  ; enable none
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 0, float %x, float %y, float %z, float %w, i1 true, i1 false)
-
-  ; enable different source combinations
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 3, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 5, float 1.0, float 2.0, float 0.5, float 4.0, i1 true, i1 false)
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 9, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
-  call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float 1.0, float 2.0, float 0.5, float 4.0, i1 false, i1 false)
-
-  ret void
-}
-
-; --------------------------------------------------------------------
-; llvm.amdgcn.exp.compr
-; --------------------------------------------------------------------
-
-declare void @llvm.amdgcn.exp.compr.v2f16(i32, i32, <2 x half>, <2 x half>, i1, i1) nounwind inaccessiblememonly
-
-; CHECK-LABEL: @exp_compr_invalid_inputs(
-; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 true, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> <half 0xH3800, half 0xH4400>, i1 true, i1 false)
-define void @exp_compr_invalid_inputs(i32 %tgt, i32 %en) {
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 %en, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 %tgt, i32 5, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
-  ret void
-}
-
-; CHECK-LABEL: @exp_compr_disabled_inputs_to_undef(
-; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> <half 0xH3C00, half 0xH4000>, <2 x half> undef, i1 true, i1 false)
-
-; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> undef, <2 x half> undef, i1 true, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> %xy, <2 x half> undef, i1 true, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> %xy, <2 x half> undef, i1 true, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> %xy, <2 x half> undef, i1 true, i1 false)
-
-; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 12, <2 x half> undef, <2 x half> %zw, i1 true, i1 false)
-; CHECK: call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
-define void @exp_compr_disabled_inputs_to_undef(<2 x half> %xy, <2 x half> %zw) {
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> <half 1.0, half 2.0>, <2 x half> <half 0.5, half 4.0>, i1 true, i1 false)
-
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 0, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 1, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 2, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 3, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
-
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 12, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
-  call void @llvm.amdgcn.exp.compr.v2f16(i32 0, i32 15, <2 x half> %xy, <2 x half> %zw, i1 true, i1 false)
-  ret void
-}
-
-; --------------------------------------------------------------------
-; llvm.amdgcn.fmed3
-; --------------------------------------------------------------------
-
-declare float @llvm.amdgcn.fmed3.f32(float, float, float) nounwind readnone
-
-; CHECK-LABEL: @fmed3_f32(
-; CHECK: %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z)
-define float @fmed3_f32(float %x, float %y, float %z) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float %z)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_canonicalize_x_c0_c1_f32(
-; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float 0.000000e+00, float 1.000000e+00)
-define float @fmed3_canonicalize_x_c0_c1_f32(float %x) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0.0, float 1.0)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_canonicalize_c0_x_c1_f32(
-; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float 0.000000e+00, float 1.000000e+00)
-define float @fmed3_canonicalize_c0_x_c1_f32(float %x) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %x, float 1.0)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_canonicalize_c0_c1_x_f32(
-; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float 0.000000e+00, float 1.000000e+00)
-define float @fmed3_canonicalize_c0_c1_x_f32(float %x) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %x)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_canonicalize_x_y_c_f32(
-; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.000000e+00)
-define float @fmed3_canonicalize_x_y_c_f32(float %x, float %y) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.0)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_canonicalize_x_c_y_f32(
-; CHECK: %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.000000e+00)
-define float @fmed3_canonicalize_x_c_y_f32(float %x, float %y) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 1.0, float %y)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_canonicalize_c_x_y_f32(
-; CHECK: call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 1.000000e+00)
-define float @fmed3_canonicalize_c_x_y_f32(float %x, float %y) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %x, float %y)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_undef_x_y_f32(
-; CHECK: call float @llvm.minnum.f32(float %x, float %y)
-define float @fmed3_undef_x_y_f32(float %x, float %y) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_fmf_undef_x_y_f32(
-; CHECK: call nnan float @llvm.minnum.f32(float %x, float %y)
-define float @fmed3_fmf_undef_x_y_f32(float %x, float %y) {
-  %med3 = call nnan float @llvm.amdgcn.fmed3.f32(float undef, float %x, float %y)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_x_undef_y_f32(
-; CHECK: call float @llvm.minnum.f32(float %x, float %y)
-define float @fmed3_x_undef_y_f32(float %x, float %y) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float undef, float %y)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_x_y_undef_f32(
-; CHECK: call float @llvm.minnum.f32(float %x, float %y)
-define float @fmed3_x_y_undef_f32(float %x, float %y) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float undef)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_qnan0_x_y_f32(
-; CHECK: call float @llvm.minnum.f32(float %x, float %y)
-define float @fmed3_qnan0_x_y_f32(float %x, float %y) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000000000000, float %x, float %y)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_x_qnan0_y_f32(
-; CHECK: call float @llvm.minnum.f32(float %x, float %y)
-define float @fmed3_x_qnan0_y_f32(float %x, float %y) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8000000000000, float %y)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_x_y_qnan0_f32(
-; CHECK: call float @llvm.minnum.f32(float %x, float %y)
-define float @fmed3_x_y_qnan0_f32(float %x, float %y) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float %y, float 0x7FF8000000000000)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_qnan1_x_y_f32(
-; CHECK: call float @llvm.minnum.f32(float %x, float %y)
-define float @fmed3_qnan1_x_y_f32(float %x, float %y) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float %x, float %y)
-  ret float %med3
-}
-
-; This can return any of the qnans.
-; CHECK-LABEL: @fmed3_qnan0_qnan1_qnan2_f32(
-; CHECK: ret float 0x7FF8002000000000
-define float @fmed3_qnan0_qnan1_qnan2_f32(float %x, float %y) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8000100000000, float 0x7FF8002000000000, float 0x7FF8030000000000)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_constant_src0_0_f32(
-; CHECK: ret float 5.000000e-01
-define float @fmed3_constant_src0_0_f32(float %x, float %y) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float -1.0, float 4.0)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_constant_src0_1_f32(
-; CHECK: ret float 5.000000e-01
-define float @fmed3_constant_src0_1_f32(float %x, float %y) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0.5, float 4.0, float -1.0)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_constant_src1_0_f32(
-; CHECK: ret float 5.000000e-01
-define float @fmed3_constant_src1_0_f32(float %x, float %y) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 0.5, float 4.0)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_constant_src1_1_f32(
-; CHECK: ret float 5.000000e-01
-define float @fmed3_constant_src1_1_f32(float %x, float %y) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float 0.5, float -1.0)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_constant_src2_0_f32(
-; CHECK: ret float 5.000000e-01
-define float @fmed3_constant_src2_0_f32(float %x, float %y) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float -1.0, float 4.0, float 0.5)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_constant_src2_1_f32(
-; CHECK: ret float 5.000000e-01
-define float @fmed3_constant_src2_1_f32(float %x, float %y) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float 4.0, float -1.0, float 0.5)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_x_qnan0_qnan1_f32(
-; CHECK: ret float %x
-define float @fmed3_x_qnan0_qnan1_f32(float %x) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float %x, float 0x7FF8001000000000, float 0x7FF8002000000000)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_qnan0_x_qnan1_f32(
-; CHECK: ret float %x
-define float @fmed3_qnan0_x_qnan1_f32(float %x) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float %x, float 0x7FF8002000000000)
-  ret float %med3
-}
-
-; CHECK-LABEL: @fmed3_qnan0_qnan1_x_f32(
-; CHECK: ret float %x
-define float @fmed3_qnan0_qnan1_x_f32(float %x) {
-  %med3 = call float @llvm.amdgcn.fmed3.f32(float 0x7FF8001000000000, float 0x7FF8002000000000, float %x)
-  ret float %med3
-}
-
-; --------------------------------------------------------------------
-; llvm.amdgcn.icmp
-; --------------------------------------------------------------------
-
-declare i64 @llvm.amdgcn.icmp.i32(i32, i32, i32) nounwind readnone convergent
-declare i64 @llvm.amdgcn.icmp.i64(i64, i64, i32) nounwind readnone convergent
-
-; Make sure there's no crash for invalid input
-; CHECK-LABEL: @invalid_nonconstant_icmp_code(
-; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
-define i64 @invalid_nonconstant_icmp_code(i32 %a, i32 %b, i32 %c) {
-  %result = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 %c)
-  ret i64 %result
-}
-
-; CHECK-LABEL: @invalid_icmp_code(
-; CHECK: %under = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 31)
-; CHECK: %over = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 42)
-define i64 @invalid_icmp_code(i32 %a, i32 %b) {
-  %under = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 31)
-  %over = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 42)
-  %or = or i64 %under, %over
-  ret i64 %or
-}
-
-; CHECK-LABEL: @icmp_constant_inputs_false(
-; CHECK: ret i64 0
-define i64 @icmp_constant_inputs_false() {
-  %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 32)
-  ret i64 %result
-}
-
-; CHECK-LABEL: @icmp_constant_inputs_true(
-; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #5
-define i64 @icmp_constant_inputs_true() {
-  %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 8, i32 34)
-  ret i64 %result
-}
-
-; CHECK-LABEL: @icmp_constant_to_rhs_slt(
-; CHECK: %result = call i64 @llvm.amdgcn.icmp.i32(i32 %x, i32 9, i32 38)
-define i64 @icmp_constant_to_rhs_slt(i32 %x) {
-  %result = call i64 @llvm.amdgcn.icmp.i32(i32 9, i32 %x, i32 40)
-  ret i64 %result
-}
-
-; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_eq_i32(
-; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
-define i64 @fold_icmp_ne_0_zext_icmp_eq_i32(i32 %a, i32 %b) {
-  %cmp = icmp eq i32 %a, %b
-  %zext.cmp = zext i1 %cmp to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ne_i32(
-; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 33)
-define i64 @fold_icmp_ne_0_zext_icmp_ne_i32(i32 %a, i32 %b) {
-  %cmp = icmp ne i32 %a, %b
-  %zext.cmp = zext i1 %cmp to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_sle_i32(
-; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 41)
-define i64 @fold_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) {
-  %cmp = icmp sle i32 %a, %b
-  %zext.cmp = zext i1 %cmp to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ugt_i64(
-; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 34)
-define i64 @fold_icmp_ne_0_zext_icmp_ugt_i64(i64 %a, i64 %b) {
-  %cmp = icmp ugt i64 %a, %b
-  %zext.cmp = zext i1 %cmp to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_icmp_ne_0_zext_icmp_ult_swap_i64(
-; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i64(i64 %a, i64 %b, i32 34)
-define i64 @fold_icmp_ne_0_zext_icmp_ult_swap_i64(i64 %a, i64 %b) {
-  %cmp = icmp ugt i64 %a, %b
-  %zext.cmp = zext i1 %cmp to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 0, i32 %zext.cmp, i32 33)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_oeq_f32(
-; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 1)
-define i64 @fold_icmp_ne_0_zext_fcmp_oeq_f32(float %a, float %b) {
-  %cmp = fcmp oeq float %a, %b
-  %zext.cmp = zext i1 %cmp to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_une_f32(
-; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 14)
-define i64 @fold_icmp_ne_0_zext_fcmp_une_f32(float %a, float %b) {
-  %cmp = fcmp une float %a, %b
-  %zext.cmp = zext i1 %cmp to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_icmp_ne_0_zext_fcmp_olt_f64(
-; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f64(double %a, double %b, i32 4)
-define i64 @fold_icmp_ne_0_zext_fcmp_olt_f64(double %a, double %b) {
-  %cmp = fcmp olt double %a, %b
-  %zext.cmp = zext i1 %cmp to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_icmp_sext_icmp_ne_0_i32(
-; CHECK: %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
-define i64 @fold_icmp_sext_icmp_ne_0_i32(i32 %a, i32 %b) {
-  %cmp = icmp eq i32 %a, %b
-  %sext.cmp = sext i1 %cmp to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 0, i32 33)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_eq_i32(
-; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 33)
-define i64 @fold_icmp_eq_0_zext_icmp_eq_i32(i32 %a, i32 %b) {
-  %cmp = icmp eq i32 %a, %b
-  %zext.cmp = zext i1 %cmp to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_icmp_eq_0_zext_icmp_slt_i32(
-; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 39)
-define i64 @fold_icmp_eq_0_zext_icmp_slt_i32(i32 %a, i32 %b) {
-  %cmp = icmp slt i32 %a, %b
-  %zext.cmp = zext i1 %cmp to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_oeq_f32(
-; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 14)
-define i64 @fold_icmp_eq_0_zext_fcmp_oeq_f32(float %a, float %b) {
-  %cmp = fcmp oeq float %a, %b
-  %zext.cmp = zext i1 %cmp to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ule_f32(
-; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 2)
-define i64 @fold_icmp_eq_0_zext_fcmp_ule_f32(float %a, float %b) {
-  %cmp = fcmp ule float %a, %b
-  %zext.cmp = zext i1 %cmp to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_icmp_eq_0_zext_fcmp_ogt_f32(
-; CHECK-NEXT: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 13)
-define i64 @fold_icmp_eq_0_zext_fcmp_ogt_f32(float %a, float %b) {
-  %cmp = fcmp ogt float %a, %b
-  %zext.cmp = zext i1 %cmp to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 32)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_icmp_zext_icmp_eq_1_i32(
-; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
-define i64 @fold_icmp_zext_icmp_eq_1_i32(i32 %a, i32 %b) {
-  %cmp = icmp eq i32 %a, %b
-  %zext.cmp = zext i1 %cmp to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 1, i32 32)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_icmp_zext_argi1_eq_1_i32(
-; CHECK: %zext.cond = zext i1 %cond to i32
-; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 0, i32 33)
-define i64 @fold_icmp_zext_argi1_eq_1_i32(i1 %cond) {
-  %zext.cond = zext i1 %cond to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 1, i32 32)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_icmp_zext_argi1_eq_neg1_i32(
-; CHECK: %zext.cond = zext i1 %cond to i32
-; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 -1, i32 32)
-define i64 @fold_icmp_zext_argi1_eq_neg1_i32(i1 %cond) {
-  %zext.cond = zext i1 %cond to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cond, i32 -1, i32 32)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_icmp_sext_argi1_eq_1_i32(
-; CHECK: %sext.cond = sext i1 %cond to i32
-; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 1, i32 32)
-define i64 @fold_icmp_sext_argi1_eq_1_i32(i1 %cond) {
-  %sext.cond = sext i1 %cond to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 1, i32 32)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i32(
-; CHECK: %sext.cond = sext i1 %cond to i32
-; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 0, i32 33)
-define i64 @fold_icmp_sext_argi1_eq_neg1_i32(i1 %cond) {
-  %sext.cond = sext i1 %cond to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cond, i32 -1, i32 32)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_icmp_sext_argi1_eq_neg1_i64(
-; CHECK: %sext.cond = sext i1 %cond to i64
-; CHECK: call i64 @llvm.amdgcn.icmp.i64(i64 %sext.cond, i64 0, i32 33)
-define i64 @fold_icmp_sext_argi1_eq_neg1_i64(i1 %cond) {
-  %sext.cond = sext i1 %cond to i64
-  %mask = call i64 @llvm.amdgcn.icmp.i64(i64 %sext.cond, i64 -1, i32 32)
-  ret i64 %mask
-}
-
-; TODO: Should be able to fold to false
-; CHECK-LABEL: @fold_icmp_sext_icmp_eq_1_i32(
-; CHECK: %cmp = icmp eq i32 %a, %b
-; CHECK: %sext.cmp = sext i1 %cmp to i32
-; CHECK: %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 1, i32 32)
-define i64 @fold_icmp_sext_icmp_eq_1_i32(i32 %a, i32 %b) {
-  %cmp = icmp eq i32 %a, %b
-  %sext.cmp = sext i1 %cmp to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 1, i32 32)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_icmp_sext_icmp_eq_neg1_i32(
-; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 32)
-define i64 @fold_icmp_sext_icmp_eq_neg1_i32(i32 %a, i32 %b) {
-  %cmp = icmp eq i32 %a, %b
-  %sext.cmp = sext i1 %cmp to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 -1, i32 32)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_icmp_sext_icmp_sge_neg1_i32(
-; CHECK: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 39)
-define i64 @fold_icmp_sext_icmp_sge_neg1_i32(i32 %a, i32 %b) {
-  %cmp = icmp sge i32 %a, %b
-  %sext.cmp = sext i1 %cmp to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %sext.cmp, i32 -1, i32 32)
-  ret i64 %mask
-}
-
-; CHECK-LABEL: @fold_not_icmp_ne_0_zext_icmp_sle_i32(
-; CHECK-NEXT: call i64 @llvm.amdgcn.icmp.i32(i32 %a, i32 %b, i32 38)
-define i64 @fold_not_icmp_ne_0_zext_icmp_sle_i32(i32 %a, i32 %b) {
-  %cmp = icmp sle i32 %a, %b
-  %not = xor i1 %cmp, true
-  %zext.cmp = zext i1 %not to i32
-  %mask = call i64 @llvm.amdgcn.icmp.i32(i32 %zext.cmp, i32 0, i32 33)
-  ret i64 %mask
-}
-
-; --------------------------------------------------------------------
-; llvm.amdgcn.fcmp
-; --------------------------------------------------------------------
-
-declare i64 @llvm.amdgcn.fcmp.f32(float, float, i32) nounwind readnone convergent
-
-; Make sure there's no crash for invalid input
-; CHECK-LABEL: @invalid_nonconstant_fcmp_code(
-; CHECK: call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c)
-define i64 @invalid_nonconstant_fcmp_code(float %a, float %b, i32 %c) {
-  %result = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 %c)
-  ret i64 %result
-}
-
-; CHECK-LABEL: @invalid_fcmp_code(
-; CHECK: %under = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 -1)
-; CHECK: %over = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 16)
-define i64 @invalid_fcmp_code(float %a, float %b) {
-  %under = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 -1)
-  %over = call i64 @llvm.amdgcn.fcmp.f32(float %a, float %b, i32 16)
-  %or = or i64 %under, %over
-  ret i64 %or
-}
-
-; CHECK-LABEL: @fcmp_constant_inputs_false(
-; CHECK: ret i64 0
-define i64 @fcmp_constant_inputs_false() {
-  %result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 1)
-  ret i64 %result
-}
-
-; CHECK-LABEL: @fcmp_constant_inputs_true(
-; CHECK: %result = call i64 @llvm.read_register.i64(metadata !0) #5
-define i64 @fcmp_constant_inputs_true() {
-  %result = call i64 @llvm.amdgcn.fcmp.f32(float 2.0, float 4.0, i32 4)
-  ret i64 %result
-}
-
-; CHECK-LABEL: @fcmp_constant_to_rhs_olt(
-; CHECK: %result = call i64 @llvm.amdgcn.fcmp.f32(float %x, float 4.000000e+00, i32 2)
-define i64 @fcmp_constant_to_rhs_olt(float %x) {
-  %result = call i64 @llvm.amdgcn.fcmp.f32(float 4.0, float %x, i32 4)
-  ret i64 %result
-}
-
-; CHECK: attributes #5 = { convergent }

Removed: llvm/trunk/test/Transforms/InstCombine/blend_x86.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/blend_x86.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/blend_x86.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/blend_x86.ll (removed)
@@ -1,151 +0,0 @@
-; RUN: opt < %s -instcombine -mtriple=x86_64-apple-macosx -mcpu=core-avx2 -S | FileCheck %s
-
-define <2 x double> @constant_blendvpd(<2 x double> %xy, <2 x double> %ab) {
-; CHECK-LABEL: @constant_blendvpd(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> %ab, <2 x double> %xy, <2 x i32> <i32 0, i32 3>
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> <double 0xFFFFFFFFE0000000, double 0.000000e+00>)
-  ret <2 x double> %1
-}
-
-define <2 x double> @constant_blendvpd_zero(<2 x double> %xy, <2 x double> %ab) {
-; CHECK-LABEL: @constant_blendvpd_zero
-; CHECK-NEXT: ret <2 x double> %xy
-  %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %ab, <2 x double> zeroinitializer)
-  ret <2 x double> %1
-}
-
-define <2 x double> @constant_blendvpd_dup(<2 x double> %xy, <2 x double> %sel) {
-; CHECK-LABEL: @constant_blendvpd_dup
-; CHECK-NEXT: ret <2 x double> %xy
-  %1 = tail call <2 x double> @llvm.x86.sse41.blendvpd(<2 x double> %xy, <2 x double> %xy, <2 x double> %sel)
-  ret <2 x double> %1
-}
-
-define <4 x float> @constant_blendvps(<4 x float> %xyzw, <4 x float> %abcd) {
-; CHECK-LABEL: @constant_blendvps(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %abcd, <4 x float> %xyzw, <4 x i32> <i32 4, i32 5, i32 6, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000>)
-  ret <4 x float> %1
-}
-
-define <4 x float> @constant_blendvps_zero(<4 x float> %xyzw, <4 x float> %abcd) {
-; CHECK-LABEL: @constant_blendvps_zero
-; CHECK-NEXT: ret <4 x float> %xyzw
-  %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %abcd, <4 x float> zeroinitializer)
-  ret <4 x float> %1
-}
-
-define <4 x float> @constant_blendvps_dup(<4 x float> %xyzw, <4 x float> %sel) {
-; CHECK-LABEL: @constant_blendvps_dup
-; CHECK-NEXT: ret <4 x float> %xyzw
-  %1 = tail call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %xyzw, <4 x float> %xyzw, <4 x float> %sel)
-  ret <4 x float> %1
-}
-
-define <16 x i8> @constant_pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd) {
-; CHECK-LABEL: @constant_pblendvb(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %abcd, <16 x i8> %xyzw, <16 x i32> <i32 16, i32 17, i32 2, i32 19, i32 4, i32 5, i32 6, i32 23, i32 24, i32 25, i32 10, i32 27, i32 12, i32 13, i32 14, i32 31>
-; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
-;
-  %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> <i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0, i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0>)
-  ret <16 x i8> %1
-}
-
-define <16 x i8> @constant_pblendvb_zero(<16 x i8> %xyzw, <16 x i8> %abcd) {
-; CHECK-LABEL: @constant_pblendvb_zero
-; CHECK-NEXT: ret <16 x i8> %xyzw
-  %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %abcd, <16 x i8> zeroinitializer)
-  ret <16 x i8> %1
-}
-
-define <16 x i8> @constant_pblendvb_dup(<16 x i8> %xyzw, <16 x i8> %sel) {
-; CHECK-LABEL: @constant_pblendvb_dup
-; CHECK-NEXT: ret <16 x i8> %xyzw
-  %1 = tail call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %xyzw, <16 x i8> %xyzw, <16 x i8> %sel)
-  ret <16 x i8> %1
-}
-
-define <4 x double> @constant_blendvpd_avx(<4 x double> %xy, <4 x double> %ab) {
-; CHECK-LABEL: @constant_blendvpd_avx(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %ab, <4 x double> %xy, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> <double 0xFFFFFFFFE0000000, double 0.000000e+00, double 0xFFFFFFFFE0000000, double 0.000000e+00>)
-  ret <4 x double> %1
-}
-
-define <4 x double> @constant_blendvpd_avx_zero(<4 x double> %xy, <4 x double> %ab) {
-; CHECK-LABEL: @constant_blendvpd_avx_zero
-; CHECK-NEXT: ret <4 x double> %xy
-  %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %ab, <4 x double> zeroinitializer)
-  ret <4 x double> %1
-}
-
-define <4 x double> @constant_blendvpd_avx_dup(<4 x double> %xy, <4 x double> %sel) {
-; CHECK-LABEL: @constant_blendvpd_avx_dup
-; CHECK-NEXT: ret <4 x double> %xy
-  %1 = tail call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %xy, <4 x double> %xy, <4 x double> %sel)
-  ret <4 x double> %1
-}
-
-define <8 x float> @constant_blendvps_avx(<8 x float> %xyzw, <8 x float> %abcd) {
-; CHECK-LABEL: @constant_blendvps_avx(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %abcd, <8 x float> %xyzw, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 12, i32 13, i32 14, i32 7>
-; CHECK-NEXT:    ret <8 x float> [[TMP1]]
-;
-  %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0xFFFFFFFFE0000000>)
-  ret <8 x float> %1
-}
-
-define <8 x float> @constant_blendvps_avx_zero(<8 x float> %xyzw, <8 x float> %abcd) {
-; CHECK-LABEL: @constant_blendvps_avx_zero
-; CHECK-NEXT: ret <8 x float> %xyzw
-  %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %abcd, <8 x float> zeroinitializer)
-  ret <8 x float> %1
-}
-
-define <8 x float> @constant_blendvps_avx_dup(<8 x float> %xyzw, <8 x float> %sel) {
-; CHECK-LABEL: @constant_blendvps_avx_dup
-; CHECK-NEXT: ret <8 x float> %xyzw
-  %1 = tail call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %xyzw, <8 x float> %xyzw, <8 x float> %sel)
-  ret <8 x float> %1
-}
-
-define <32 x i8> @constant_pblendvb_avx2(<32 x i8> %xyzw, <32 x i8> %abcd) {
-; CHECK-LABEL: @constant_pblendvb_avx2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %abcd, <32 x i8> %xyzw, <32 x i32> <i32 32, i32 33, i32 2, i32 35, i32 4, i32 5, i32 6, i32 39, i32 40, i32 41, i32 10, i32 43, i32 12, i32 13, i32 14, i32 47, i32 48, i32 49, i32 18, i32 51, i32 20, i32 21, i32 22, i32 55, i32 56, i32 57, i32 26, i32 59, i32 28, i32 29, i32 30, i32 63>
-; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
-;
-  %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd,
-        <32 x i8> <i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
-                   i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
-                   i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0,
-                   i8 0, i8 0, i8 255, i8 0, i8 255, i8 255, i8 255, i8 0>)
-  ret <32 x i8> %1
-}
-
-define <32 x i8> @constant_pblendvb_avx2_zero(<32 x i8> %xyzw, <32 x i8> %abcd) {
-; CHECK-LABEL: @constant_pblendvb_avx2_zero
-; CHECK-NEXT: ret <32 x i8> %xyzw
-  %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %abcd, <32 x i8> zeroinitializer)
-  ret <32 x i8> %1
-}
-
-define <32 x i8> @constant_pblendvb_avx2_dup(<32 x i8> %xyzw, <32 x i8> %sel) {
-; CHECK-LABEL: @constant_pblendvb_avx2_dup
-; CHECK-NEXT: ret <32 x i8> %xyzw
-  %1 = tail call <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8> %xyzw, <32 x i8> %xyzw, <32 x i8> %sel)
-  ret <32 x i8> %1
-}
-
-declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>)
-declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, <4 x float>)
-declare <2 x double> @llvm.x86.sse41.blendvpd(<2 x double>, <2 x double>, <2 x double>)
-
-declare <32 x i8> @llvm.x86.avx2.pblendvb(<32 x i8>, <32 x i8>, <32 x i8>)
-declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, <8 x float>)
-declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, <4 x double>)

Removed: llvm/trunk/test/Transforms/InstCombine/constant-fold-hang.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/constant-fold-hang.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/constant-fold-hang.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/constant-fold-hang.ll (removed)
@@ -1,14 +0,0 @@
-; RUN: opt -instcombine < %s
-
-; Function Attrs: nounwind readnone ssp
-define void @mulByZero(<4 x i16> %x) #0 {
-entry:
-  %a = tail call <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16> %x, <4 x i16> zeroinitializer) #2
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare <4 x i32> @llvm.arm.neon.vmulls.v4i32(<4 x i16>, <4 x i16>) #1
-
-attributes #0 = { nounwind readnone ssp }
-attributes #1 = { nounwind readnone }

Removed: llvm/trunk/test/Transforms/InstCombine/neon-intrinsics.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/neon-intrinsics.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/neon-intrinsics.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/neon-intrinsics.ll (removed)
@@ -1,25 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-; The alignment arguments for NEON load/store intrinsics can be increased
-; by instcombine.  Check for this.
-
-; CHECK: vld4.v2i32.p0i8({{.*}}, i32 32)
-; CHECK: vst4.p0i8.v2i32({{.*}}, i32 16)
-
- at x = common global [8 x i32] zeroinitializer, align 32
- at y = common global [8 x i32] zeroinitializer, align 16
-
-%struct.__neon_int32x2x4_t = type { <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32> }
-
-define void @test() nounwind ssp {
-  %tmp1 = call %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0i8(i8* bitcast ([8 x i32]* @x to i8*), i32 1)
-  %tmp2 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 0
-  %tmp3 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 1
-  %tmp4 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 2
-  %tmp5 = extractvalue %struct.__neon_int32x2x4_t %tmp1, 3
-  call void @llvm.arm.neon.vst4.p0i8.v2i32(i8* bitcast ([8 x i32]* @y to i8*), <2 x i32> %tmp2, <2 x i32> %tmp3, <2 x i32> %tmp4, <2 x i32> %tmp5, i32 1)
-  ret void
-}
-
-declare %struct.__neon_int32x2x4_t @llvm.arm.neon.vld4.v2i32.p0i8(i8*, i32) nounwind readonly
-declare void @llvm.arm.neon.vst4.p0i8.v2i32(i8*, <2 x i32>, <2 x i32>, <2 x i32>, <2 x i32>, i32) nounwind

Removed: llvm/trunk/test/Transforms/InstCombine/pr2645-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/pr2645-1.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/pr2645-1.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/pr2645-1.ll (removed)
@@ -1,39 +0,0 @@
-; RUN: opt < %s -instcombine -S | grep shufflevector
-; PR2645
-
-; instcombine shouldn't delete the shufflevector.
-
-define internal void @""(i8*, i32, i8*) {
-; <label>:3
-        br label %4
-
-; <label>:4             ; preds = %6, %3
-        %.0 = phi i32 [ 0, %3 ], [ %19, %6 ]            ; <i32> [#uses=4]
-        %5 = icmp slt i32 %.0, %1               ; <i1> [#uses=1]
-        br i1 %5, label %6, label %20
-
-; <label>:6             ; preds = %4
-        %7 = getelementptr i8, i8* %2, i32 %.0              ; <i8*> [#uses=1]
-        %8 = bitcast i8* %7 to <4 x i16>*               ; <<4 x i16>*> [#uses=1]
-        %9 = load <4 x i16>, <4 x i16>* %8, align 1                ; <<4 x i16>> [#uses=1]
-        %10 = bitcast <4 x i16> %9 to <1 x i64>         ; <<1 x i64>> [#uses=1]
-        %11 = call <2 x i64> @foo(<1 x i64> %10)
-; <<2 x i64>> [#uses=1]
-        %12 = bitcast <2 x i64> %11 to <4 x i32>                ; <<4 x i32>> [#uses=1]
-        %13 = bitcast <4 x i32> %12 to <8 x i16>                ; <<8 x i16>> [#uses=2]
-        %14 = shufflevector <8 x i16> %13, <8 x i16> %13, <8 x i32> < i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3 >          ; <<8 x i16>> [#uses=1]
-        %15 = bitcast <8 x i16> %14 to <4 x i32>                ; <<4 x i32>> [#uses=1]
-        %16 = sitofp <4 x i32> %15 to <4 x float>               ; <<4 x float>> [#uses=1]
-        %17 = getelementptr i8, i8* %0, i32 %.0             ; <i8*> [#uses=1]
-        %18 = bitcast i8* %17 to <4 x float>*           ; <<4 x float>*> [#uses=1]
-        store <4 x float> %16, <4 x float>* %18, align 1
-        %19 = add i32 %.0, 1            ; <i32> [#uses=1]
-        br label %4
-
-; <label>:20            ; preds = %4
-        call void @llvm.x86.mmx.emms( )
-        ret void
-}
-
-declare <2 x i64> @foo(<1 x i64>)
-declare void @llvm.x86.mmx.emms( )

Removed: llvm/trunk/test/Transforms/InstCombine/shufflemask-undef.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/shufflemask-undef.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/shufflemask-undef.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/shufflemask-undef.ll (removed)
@@ -1,109 +0,0 @@
-; RUN: opt < %s -instcombine -S | not grep "shufflevector.*i32 8"
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin9"
-	%struct.ActiveTextureTargets = type { i64, i64, i64, i64, i64, i64 }
-	%struct.AlphaTest = type { float, i16, i8, i8 }
-	%struct.ArrayRange = type { i8, i8, i8, i8 }
-	%struct.BlendMode = type { i16, i16, i16, i16, %struct.IColor4, i16, i16, i8, i8, i8, i8 }
-	%struct.ClearColor = type { double, %struct.IColor4, %struct.IColor4, float, i32 }
-	%struct.ClipPlane = type { i32, [6 x %struct.IColor4] }
-	%struct.ColorBuffer = type { i16, i8, i8, [8 x i16], [0 x i32] }
-	%struct.ColorMatrix = type { [16 x float]*, %struct.ImagingColorScale }
-	%struct.Convolution = type { %struct.IColor4, %struct.ImagingColorScale, i16, i16, [0 x i32], float*, i32, i32 }
-	%struct.DepthTest = type { i16, i16, i8, i8, i8, i8, double, double }
-	%struct.FixedFunction = type { %struct.PPStreamToken* }
-	%struct.FogMode = type { %struct.IColor4, float, float, float, float, float, i16, i16, i16, i8, i8 }
-	%struct.HintMode = type { i16, i16, i16, i16, i16, i16, i16, i16, i16, i16 }
-	%struct.Histogram = type { %struct.ProgramLimits*, i32, i16, i8, i8 }
-	%struct.ImagingColorScale = type { %struct.TCoord2, %struct.TCoord2, %struct.TCoord2, %struct.TCoord2 }
-	%struct.ImagingSubset = type { %struct.Convolution, %struct.Convolution, %struct.Convolution, %struct.ColorMatrix, %struct.Minmax, %struct.Histogram, %struct.ImagingColorScale, %struct.ImagingColorScale, %struct.ImagingColorScale, %struct.ImagingColorScale, i32, [0 x i32] }
-	%struct.Light = type { %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.PointLineLimits, float, float, float, float, float, %struct.PointLineLimits, float, %struct.PointLineLimits, float, %struct.PointLineLimits, float, float, float, float, float }
-	%struct.LightModel = type { %struct.IColor4, [8 x %struct.Light], [2 x %struct.Material], i32, i16, i16, i16, i8, i8, i8, i8, i8, i8 }
-	%struct.LightProduct = type { %struct.IColor4, %struct.IColor4, %struct.IColor4 }
-	%struct.LineMode = type { float, i32, i16, i16, i8, i8, i8, i8 }
-	%struct.LogicOp = type { i16, i8, i8 }
-	%struct.MaskMode = type { i32, [3 x i32], i8, i8, i8, i8, i8, i8, i8, i8 }
-	%struct.Material = type { %struct.IColor4, %struct.IColor4, %struct.IColor4, %struct.IColor4, float, float, float, float, [8 x %struct.LightProduct], %struct.IColor4, [8 x i32] }
-	%struct.Minmax = type { %struct.MinmaxTable*, i16, i8, i8, [0 x i32] }
-	%struct.MinmaxTable = type { %struct.IColor4, %struct.IColor4 }
-	%struct.Mipmaplevel = type { [4 x i32], [4 x i32], [4 x float], [4 x i32], i32, i32, float*, i8*, i16, i16, i16, i16, [2 x float] }
-	%struct.Multisample = type { float, i8, i8, i8, i8, i8, i8, i8, i8 }
-	%struct.PipelineProgramState = type { i8, i8, i8, i8, [0 x i32], %struct.IColor4* }
-	%struct.PixelMap = type { i32*, float*, float*, float*, float*, float*, float*, float*, float*, i32*, i32, i32, i32, i32, i32, i32, i32, i32, i32, i32 }
-	%struct.PixelMode = type { float, float, %struct.PixelStore, %struct.PixelTransfer, %struct.PixelMap, %struct.ImagingSubset, i32, i32 }
-	%struct.PixelPack = type { i32, i32, i32, i32, i32, i32, i32, i32, i8, i8, i8, i8 }
-	%struct.PixelStore = type { %struct.PixelPack, %struct.PixelPack }
-	%struct.PixelTransfer = type { float, float, float, float, float, float, float, float, float, float, i32, i32, float, float, float, float, float, float, float, float, float, float, float, float }
-	%struct.PluginBufferData = type { i32 }
-	%struct.PointLineLimits = type { float, float, float }
-	%struct.PointMode = type { float, float, float, float, %struct.PointLineLimits, float, i8, i8, i8, i8, i16, i16, i32, i16, i16 }
-	%struct.PolygonMode = type { [128 x i8], float, float, i16, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8 }
-	%struct.ProgramLimits = type { i32, i32, i32, i32 }
-	%struct.RegisterCombiners = type { i8, i8, i8, i8, i32, [2 x %struct.IColor4], [8 x %struct.RegisterCombinersPerStageState], %struct.RegisterCombinersFinalStageState }
-	%struct.RegisterCombinersFinalStageState = type { i8, i8, i8, i8, [7 x %struct.RegisterCombinersPerVariableState] }
-	%struct.RegisterCombinersPerPortionState = type { [4 x %struct.RegisterCombinersPerVariableState], i8, i8, i8, i8, i16, i16, i16, i16, i16, i16 }
-	%struct.RegisterCombinersPerStageState = type { [2 x %struct.RegisterCombinersPerPortionState], [2 x %struct.IColor4] }
-	%struct.RegisterCombinersPerVariableState = type { i16, i16, i16, i16 }
-	%struct.SWRSurfaceRec = type { i32, i32, i32, i32, i32, i32, i32, i32, i32, i32, i8*, i8*, i8*, [4 x i8*], i32 }
-	%struct.ScissorTest = type { %struct.ProgramLimits, i8, i8, i8, i8 }
-	%struct.State = type <{ i16, i16, i16, i16, i32, i32, [256 x %struct.IColor4], [128 x %struct.IColor4], %struct.Viewport, %struct.Transform, %struct.LightModel, %struct.ActiveTextureTargets, %struct.AlphaTest, %struct.BlendMode, %struct.ClearColor, %struct.ColorBuffer, %struct.DepthTest, %struct.ArrayRange, %struct.FogMode, %struct.HintMode, %struct.LineMode, %struct.LogicOp, %struct.MaskMode, %struct.PixelMode, %struct.PointMode, %struct.PolygonMode, %struct.ScissorTest, i32, %struct.StencilTest, [8 x %struct.TextureMode], [16 x %struct.TextureImageMode], %struct.ArrayRange, [8 x %struct.TextureCoordGen], %struct.ClipPlane, %struct.Multisample, %struct.RegisterCombiners, %struct.ArrayRange, %struct.ArrayRange, [3 x %struct.PipelineProgramState], %struct.ArrayRange, %struct.TransformFeedback, i32*, %struct.FixedFunction, [3 x i32], [3 x i32] }>
-	%struct.StencilTest = type { [3 x { i32, i32, i16, i16, i16, i16 }], i32, [4 x i8] }
-	%struct.TextureCoordGen = type { { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, { i16, i16, %struct.IColor4, %struct.IColor4 }, i8, i8, i8, i8 }
-	%struct.TextureGeomState = type { i16, i16, i16, i16, i16, i8, i8, i8, i8, i16, i16, i16, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, [6 x i16], [6 x i16] }
-	%struct.TextureImageMode = type { float }
-	%struct.TextureLevel = type { i32, i32, i16, i16, i16, i8, i8, i16, i16, i16, i16, i8* }
-	%struct.TextureMode = type { %struct.IColor4, i32, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, i16, float, float, i16, i16, i16, i16, i16, i16, [4 x i16], i8, i8, i8, i8, [3 x float], [4 x float], float, float }
-	%struct.TextureParamState = type { i16, i16, i16, i16, i16, i16, %struct.IColor4, float, float, float, float, i16, i16, i16, i16, float, i16, i8, i8, i32, i8* }
-	%struct.TextureRec = type { [4 x float], %struct.TextureState*, %struct.Mipmaplevel*, %struct.Mipmaplevel*, float, float, float, float, i8, i8, i8, i8, i16, i16, i16, i16, i32, float, [2 x %struct.PPStreamToken] }
-	%struct.TextureState = type { i16, i8, i8, i16, i16, float, i32, %struct.SWRSurfaceRec*, %struct.TextureParamState, %struct.TextureGeomState, [0 x i32], i8*, i32, %struct.TextureLevel, [1 x [15 x %struct.TextureLevel]] }
-	%struct.Transform = type <{ [24 x [16 x float]], [24 x [16 x float]], [16 x float], float, float, float, float, float, i8, i8, i8, i8, i32, i32, i32, i16, i16, i8, i8, i8, i8, i32 }>
-	%struct.TransformFeedback = type { i8, i8, i8, i8, [0 x i32], [16 x i32], [16 x i32] }
-	%struct.Viewport = type { float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, float, double, double, i32, i32, i32, i32, float, float, float, float }
-	%struct.IColor4 = type { float, float, float, float }
-	%struct.TCoord2 = type { float, float }
-	%struct.VMGPStack = type { [6 x <4 x float>*], <4 x float>*, i32, i32, <4 x float>*, <4 x float>**, i32, i32, i32, i32, i32, i32 }
-	%struct.VMTextures = type { [16 x %struct.TextureRec*] }
-	%struct.PPStreamToken = type { { i16, i16, i32 } }
-	%struct._VMConstants = type { <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>, float, float, float, float, float, float, float, float, float, float, float, float, [256 x float], [528 x i8], { void (i8*, i8*, i32, i8*)*, float (float)*, float (float)*, float (float)*, i32 (float)* } }
-
-define i32 @foo(%struct.State* %dst, <4 x float>* %prgrm, <4 x float>** %buffs, %struct._VMConstants* %cnstn, %struct.PPStreamToken* %pstrm, %struct.PluginBufferData* %gpctx, %struct.VMTextures* %txtrs, %struct.VMGPStack* %gpstk) nounwind {
-bb266.i:
-	getelementptr <4 x float>, <4 x float>* null, i32 11		; <<4 x float>*>:0 [#uses=1]
-	load <4 x float>, <4 x float>* %0, align 16		; <<4 x float>>:1 [#uses=1]
-	shufflevector <4 x float> %1, <4 x float> undef, <4 x i32> < i32 0, i32 1, i32 1, i32 1 >		; <<4 x float>>:2 [#uses=1]
-	shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>>:3 [#uses=1]
-	shufflevector <4 x float> undef, <4 x float> undef, <4 x i32> < i32 0, i32 4, i32 1, i32 5 >		; <<4 x float>>:4 [#uses=1]
-	shufflevector <4 x float> %4, <4 x float> %3, <4 x i32> < i32 6, i32 7, i32 2, i32 3 >		; <<4 x float>>:5 [#uses=1]
-	fmul <4 x float> %5, zeroinitializer		; <<4 x float>>:6 [#uses=2]
-	fmul <4 x float> %6, %6		; <<4 x float>>:7 [#uses=1]
-	fadd <4 x float> zeroinitializer, %7		; <<4 x float>>:8 [#uses=1]
-	call <4 x float> @llvm.x86.sse.max.ps( <4 x float> zeroinitializer, <4 x float> %8 ) nounwind readnone		; <<4 x float>>:9 [#uses=1]
-	%phitmp40 = bitcast <4 x float> %9 to <4 x i32>		; <<4 x i32>> [#uses=1]
-	%tmp4109.i = and <4 x i32> %phitmp40, < i32 8388607, i32 8388607, i32 8388607, i32 8388607 >		; <<4 x i32>> [#uses=1]
-	%tmp4116.i = or <4 x i32> %tmp4109.i, < i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216 >		; <<4 x i32>> [#uses=1]
-	%tmp4117.i = bitcast <4 x i32> %tmp4116.i to <4 x float>		; <<4 x float>> [#uses=1]
-	fadd <4 x float> %tmp4117.i, zeroinitializer		; <<4 x float>>:10 [#uses=1]
-	fmul <4 x float> %10, < float 5.000000e-01, float 5.000000e-01, float 5.000000e-01, float 5.000000e-01 >		; <<4 x float>>:11 [#uses=1]
-	call <4 x float> @llvm.x86.sse.max.ps( <4 x float> %11, <4 x float> zeroinitializer ) nounwind readnone		; <<4 x float>>:12 [#uses=1]
-	call <4 x float> @llvm.x86.sse.min.ps( <4 x float> %12, <4 x float> zeroinitializer ) nounwind readnone		; <<4 x float>>:13 [#uses=1]
-	%tmp4170.i = call <4 x float> @llvm.x86.sse.cmp.ps( <4 x float> %13, <4 x float> zeroinitializer, i8 2 ) nounwind		; <<4 x float>> [#uses=1]
-	bitcast <4 x float> %tmp4170.i to <16 x i8>		; <<16 x i8>>:14 [#uses=1]
-	call i32 @llvm.x86.sse2.pmovmskb.128( <16 x i8> %14 ) nounwind readnone		; <i32>:15 [#uses=1]
-	icmp eq i32 %15, 0		; <i1>:16 [#uses=1]
-	br i1 %16, label %bb5574.i, label %bb4521.i
-
-bb4521.i:		; preds = %bb266.i
-	unreachable
-
-bb5574.i:		; preds = %bb266.i
-	unreachable
-}
-
-declare <4 x float> @llvm.x86.sse.cmp.ps(<4 x float>, <4 x float>, i8) nounwind readnone
-
-declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>) nounwind readnone
-
-declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
-
-declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone

Modified: llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll?rev=302979&r1=302978&r2=302979&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/vec_demanded_elts.ll Sat May 13 00:39:46 2017
@@ -2,30 +2,6 @@
 ; RUN: opt < %s -instcombine -S | FileCheck %s
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 
-define i16 @test1(float %f) {
-; CHECK-LABEL: @test1(
-; CHECK-NEXT:    [[TMP281:%.*]] = fadd float %f, -1.000000e+00
-; CHECK-NEXT:    [[TMP373:%.*]] = fmul float [[TMP281]], 5.000000e-01
-; CHECK-NEXT:    [[TMP374:%.*]] = insertelement <4 x float> undef, float [[TMP373]], i32 0
-; CHECK-NEXT:    [[TMP48:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP374]], <4 x float> <float 6.553500e+04, float undef, float undef, float undef>)
-; CHECK-NEXT:    [[TMP59:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP48]], <4 x float> <float 0.000000e+00, float undef, float undef, float undef>)
-; CHECK-NEXT:    [[TMP_UPGRD_1:%.*]] = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> [[TMP59]])
-; CHECK-NEXT:    [[TMP69:%.*]] = trunc i32 [[TMP_UPGRD_1]] to i16
-; CHECK-NEXT:    ret i16 [[TMP69]]
-;
-  %tmp = insertelement <4 x float> undef, float %f, i32 0
-  %tmp10 = insertelement <4 x float> %tmp, float 0.000000e+00, i32 1
-  %tmp11 = insertelement <4 x float> %tmp10, float 0.000000e+00, i32 2
-  %tmp12 = insertelement <4 x float> %tmp11, float 0.000000e+00, i32 3
-  %tmp28 = tail call <4 x float> @llvm.x86.sse.sub.ss( <4 x float> %tmp12, <4 x float> < float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )
-  %tmp37 = tail call <4 x float> @llvm.x86.sse.mul.ss( <4 x float> %tmp28, <4 x float> < float 5.000000e-01, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )
-  %tmp48 = tail call <4 x float> @llvm.x86.sse.min.ss( <4 x float> %tmp37, <4 x float> < float 6.553500e+04, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00 > )
-  %tmp59 = tail call <4 x float> @llvm.x86.sse.max.ss( <4 x float> %tmp48, <4 x float> zeroinitializer )
-  %tmp.upgrd.1 = tail call i32 @llvm.x86.sse.cvttss2si( <4 x float> %tmp59 )
-  %tmp69 = trunc i32 %tmp.upgrd.1 to i16
-  ret i16 %tmp69
-}
-
 define i32 @test2(float %f) {
 ; CHECK-LABEL: @test2(
 ; CHECK-NEXT:    [[TMP5:%.*]] = fmul float %f, %f
@@ -42,77 +18,6 @@ define i32 @test2(float %f) {
   ret i32 %tmp21
 }
 
-define i64 @test3(float %f, double %d) {
-; CHECK-LABEL: @test3(
-; CHECK-NEXT:    [[V00:%.*]] = insertelement <4 x float> undef, float %f, i32 0
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> [[V00]])
-; CHECK-NEXT:    [[V10:%.*]] = insertelement <4 x float> undef, float %f, i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> [[V10]])
-; CHECK-NEXT:    [[V20:%.*]] = insertelement <4 x float> undef, float %f, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> [[V20]])
-; CHECK-NEXT:    [[V30:%.*]] = insertelement <4 x float> undef, float %f, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> [[V30]])
-; CHECK-NEXT:    [[V40:%.*]] = insertelement <2 x double> undef, double %d, i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> [[V40]])
-; CHECK-NEXT:    [[V50:%.*]] = insertelement <2 x double> undef, double %d, i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> [[V50]])
-; CHECK-NEXT:    [[V60:%.*]] = insertelement <2 x double> undef, double %d, i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> [[V60]])
-; CHECK-NEXT:    [[V70:%.*]] = insertelement <2 x double> undef, double %d, i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> [[V70]])
-; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP0]], [[TMP2]]
-; CHECK-NEXT:    [[TMP9:%.*]] = add i32 [[TMP4]], [[TMP6]]
-; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
-; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
-; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]]
-; CHECK-NEXT:    ret i64 [[TMP15]]
-;
-  %v00 = insertelement <4 x float> undef, float %f, i32 0
-  %v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1
-  %v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2
-  %v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3
-  %tmp0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %v03)
-  %v10 = insertelement <4 x float> undef, float %f, i32 0
-  %v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1
-  %v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2
-  %v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3
-  %tmp1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %v13)
-  %v20 = insertelement <4 x float> undef, float %f, i32 0
-  %v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1
-  %v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2
-  %v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3
-  %tmp2 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %v23)
-  %v30 = insertelement <4 x float> undef, float %f, i32 0
-  %v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1
-  %v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2
-  %v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3
-  %tmp3 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %v33)
-  %v40 = insertelement <2 x double> undef, double %d, i32 0
-  %v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1
-  %tmp4 = tail call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %v41)
-  %v50 = insertelement <2 x double> undef, double %d, i32 0
-  %v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1
-  %tmp5 = tail call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %v51)
-  %v60 = insertelement <2 x double> undef, double %d, i32 0
-  %v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1
-  %tmp6 = tail call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %v61)
-  %v70 = insertelement <2 x double> undef, double %d, i32 0
-  %v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1
-  %tmp7 = tail call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %v71)
-  %tmp8 = add i32 %tmp0, %tmp2
-  %tmp9 = add i32 %tmp4, %tmp6
-  %tmp10 = add i32 %tmp8, %tmp9
-  %tmp11 = sext i32 %tmp10 to i64
-  %tmp12 = add i64 %tmp1, %tmp3
-  %tmp13 = add i64 %tmp5, %tmp7
-  %tmp14 = add i64 %tmp12, %tmp13
-  %tmp15 = add i64 %tmp11, %tmp14
-  ret i64 %tmp15
-}
-
 define void @get_image() nounwind {
 ; CHECK-LABEL: @get_image(
 ; CHECK-NEXT:  entry:
@@ -156,18 +61,6 @@ entry:
 }
 
 declare i32 @fgetc(i8*)
-declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
-declare i32 @llvm.x86.sse.cvtss2si(<4 x float>)
-declare i64 @llvm.x86.sse.cvtss2si64(<4 x float>)
-declare i32 @llvm.x86.sse.cvttss2si(<4 x float>)
-declare i64 @llvm.x86.sse.cvttss2si64(<4 x float>)
-declare i32 @llvm.x86.sse2.cvtsd2si(<2 x double>)
-declare i64 @llvm.x86.sse2.cvtsd2si64(<2 x double>)
-declare i32 @llvm.x86.sse2.cvttsd2si(<2 x double>)
-declare i64 @llvm.x86.sse2.cvttsd2si64(<2 x double>)
 
 define <4 x float> @dead_shuffle_elt(<4 x float> %x, <2 x float> %y) nounwind {
 ; CHECK-LABEL: @dead_shuffle_elt(
@@ -248,4 +141,3 @@ define <2 x i64> @PR24922(<2 x i64> %v)
   %result = select <2 x i1> <i1 icmp eq (i64 extractelement (<2 x i64> bitcast (<4 x i32> <i32 15, i32 15, i32 15, i32 15> to <2 x i64>), i64 0), i64 0), i1 true>, <2 x i64> %v, <2 x i64> zeroinitializer
   ret <2 x i64> %result
 }
-

Removed: llvm/trunk/test/Transforms/InstCombine/vsx-unaligned.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/vsx-unaligned.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/vsx-unaligned.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/vsx-unaligned.ll (removed)
@@ -1,44 +0,0 @@
-; Verify that we can create unaligned loads and stores from VSX intrinsics.
-
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-target triple = "powerpc64-unknown-linux-gnu"
-
- at vf = common global <4 x float> zeroinitializer, align 1
- at res_vf = common global <4 x float> zeroinitializer, align 1
- at vd = common global <2 x double> zeroinitializer, align 1
- at res_vd = common global <2 x double> zeroinitializer, align 1
-
-define void @test1() {
-entry:
-  %t1 = alloca <4 x float>*, align 8
-  %t2 = alloca <2 x double>*, align 8
-  store <4 x float>* @vf, <4 x float>** %t1, align 8
-  %0 = load <4 x float>*, <4 x float>** %t1, align 8
-  %1 = bitcast <4 x float>* %0 to i8*
-  %2 = call <4 x i32> @llvm.ppc.vsx.lxvw4x(i8* %1)
-  store <4 x float>* @res_vf, <4 x float>** %t1, align 8
-  %3 = load <4 x float>*, <4 x float>** %t1, align 8
-  %4 = bitcast <4 x float>* %3 to i8*
-  call void @llvm.ppc.vsx.stxvw4x(<4 x i32> %2, i8* %4)
-  store <2 x double>* @vd, <2 x double>** %t2, align 8
-  %5 = load <2 x double>*, <2 x double>** %t2, align 8
-  %6 = bitcast <2 x double>* %5 to i8*
-  %7 = call <2 x double> @llvm.ppc.vsx.lxvd2x(i8* %6)
-  store <2 x double>* @res_vd, <2 x double>** %t2, align 8
-  %8 = load <2 x double>*, <2 x double>** %t2, align 8
-  %9 = bitcast <2 x double>* %8 to i8*
-  call void @llvm.ppc.vsx.stxvd2x(<2 x double> %7, i8* %9)
-  ret void
-}
-
-; CHECK-LABEL: @test1
-; CHECK: %0 = load <4 x i32>, <4 x i32>* bitcast (<4 x float>* @vf to <4 x i32>*), align 1
-; CHECK: store <4 x i32> %0, <4 x i32>* bitcast (<4 x float>* @res_vf to <4 x i32>*), align 1
-; CHECK: %1 = load <2 x double>, <2 x double>* @vd, align 1
-; CHECK: store <2 x double> %1, <2 x double>* @res_vd, align 1
-
-declare <4 x i32> @llvm.ppc.vsx.lxvw4x(i8*)
-declare void @llvm.ppc.vsx.stxvw4x(<4 x i32>, i8*)
-declare <2 x double> @llvm.ppc.vsx.lxvd2x(i8*)
-declare void @llvm.ppc.vsx.stxvd2x(<2 x double>, i8*)

Removed: llvm/trunk/test/Transforms/InstCombine/x86-avx2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-avx2.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-avx2.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-avx2.ll (removed)
@@ -1,109 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-; Verify that instcombine is able to fold identity shuffles.
-
-define <8 x i32> @identity_test_vpermd(<8 x i32> %a0) {
-; CHECK-LABEL: @identity_test_vpermd(
-; CHECK-NEXT:    ret <8 x i32> %a0
-;
-  %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
-  ret <8 x i32> %a
-}
-
-define <8 x float> @identity_test_vpermps(<8 x float> %a0) {
-; CHECK-LABEL: @identity_test_vpermps(
-; CHECK-NEXT:    ret <8 x float> %a0
-;
-  %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
-  ret <8 x float> %a
-}
-
-; Instcombine should be able to fold the following shuffle to a builtin shufflevector
-; with a shuffle mask of all zeroes.
-
-define <8 x i32> @zero_test_vpermd(<8 x i32> %a0) {
-; CHECK-LABEL: @zero_test_vpermd(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer)
-  ret <8 x i32> %a
-}
-
-define <8 x float> @zero_test_vpermps(<8 x float> %a0) {
-; CHECK-LABEL: @zero_test_vpermps(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT:    ret <8 x float> [[TMP1]]
-;
-  %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer)
-  ret <8 x float> %a
-}
-
-; Verify that instcombine is able to fold constant shuffles.
-
-define <8 x i32> @shuffle_test_vpermd(<8 x i32> %a0) {
-; CHECK-LABEL: @shuffle_test_vpermd(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
-  ret <8 x i32> %a
-}
-
-define <8 x float> @shuffle_test_vpermps(<8 x float> %a0) {
-; CHECK-LABEL: @shuffle_test_vpermps(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <8 x float> [[TMP1]]
-;
-  %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
-  ret <8 x float> %a
-}
-
-; Verify that instcombine is able to fold constant shuffles with undef mask elements.
-
-define <8 x i32> @undef_test_vpermd(<8 x i32> %a0) {
-; CHECK-LABEL: @undef_test_vpermd(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
-  ret <8 x i32> %a
-}
-
-define <8 x float> @undef_test_vpermps(<8 x float> %a0) {
-; CHECK-LABEL: @undef_test_vpermps(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <8 x float> [[TMP1]]
-;
-  %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
-  ret <8 x float> %a
-}
-
-; Verify simplify demanded elts.
-
-define <8 x i32> @elts_test_vpermd(<8 x i32> %a0, i32 %a1) {
-; CHECK-LABEL: @elts_test_vpermd(
-; CHECK-NEXT:    ret <8 x i32> %a0
-;
-  %1 = insertelement <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, i32 %a1, i32 0
-  %2 = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> %1)
-  %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <8 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  ret <8 x i32> %3
-}
-
-define <8 x float> @elts_test_vpermps(<8 x float> %a0, <8 x i32> %a1) {
-; CHECK-LABEL: @elts_test_vpermps(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %a1)
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x float> [[TMP1]], <8 x float> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT:    ret <8 x float> [[TMP2]]
-;
-  %1 = insertelement <8 x i32> %a1, i32 0, i32 7
-  %2 = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> %1)
-  %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> zeroinitializer
-  ret <8 x float> %3
-}
-
-declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>)
-declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>)

Removed: llvm/trunk/test/Transforms/InstCombine/x86-avx512.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-avx512.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-avx512.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-avx512.ll (removed)
@@ -1,2793 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
-
-define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @test_add_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
-; CHECK-NEXT:    ret <4 x float> [[TMP4]]
-;
-  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
-  ret <4 x float> %4
-}
-
-define <4 x float> @test_add_ss_round(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @test_add_ss_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)
-  ret <4 x float> %4
-}
-
-define <4 x float> @test_add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_add_ss_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
-; CHECK-NEXT:    ret <4 x float> [[TMP8]]
-;
-  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
-  ret <4 x float> %4
-}
-
-define <4 x float> @test_add_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_add_ss_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 8)
-  ret <4 x float> %4
-}
-
-define float @test_add_ss_1(float %a, float %b) {
-; CHECK-LABEL: @test_add_ss_1(
-; CHECK-NEXT:    ret float 1.000000e+00
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
-  %10 = extractelement <4 x float> %9, i32 1
-  ret float %10
-}
-
-declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
-
-define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: @test_add_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = fadd double [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
-; CHECK-NEXT:    ret <2 x double> [[TMP4]]
-;
-  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
-  ret <2 x double> %2
-}
-
-define <2 x double> @test_add_sd_round(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: @test_add_sd_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
-  ret <2 x double> %2
-}
-
-define <2 x double> @test_add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_add_sd_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = fadd double [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
-; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
-; CHECK-NEXT:    ret <2 x double> [[TMP8]]
-;
-  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
-  ret <2 x double> %2
-}
-
-define <2 x double> @test_add_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_add_sd_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
-  ret <2 x double> %2
-}
-
-define double @test_add_sd_1(double %a, double %b) {
-; CHECK-LABEL: @test_add_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
-  %6 = extractelement <2 x double> %5, i32 1
-  ret double %6
-}
-
-declare <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
-
-define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @test_sub_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
-; CHECK-NEXT:    ret <4 x float> [[TMP4]]
-;
-  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
-  ret <4 x float> %4
-}
-
-define <4 x float> @test_sub_ss_round(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @test_sub_ss_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)
-  ret <4 x float> %4
-}
-
-define <4 x float> @test_sub_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_sub_ss_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
-; CHECK-NEXT:    ret <4 x float> [[TMP8]]
-;
-  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
-  ret <4 x float> %4
-}
-
-define <4 x float> @test_sub_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_sub_ss_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 8)
-  ret <4 x float> %4
-}
-
-define float @test_sub_ss_1(float %a, float %b) {
-; CHECK-LABEL: @test_sub_ss_1(
-; CHECK-NEXT:    ret float 1.000000e+00
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
-  %10 = extractelement <4 x float> %9, i32 1
-  ret float %10
-}
-
-declare <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
-
-define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: @test_sub_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = fsub double [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
-; CHECK-NEXT:    ret <2 x double> [[TMP4]]
-;
-  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
-  ret <2 x double> %2
-}
-
-define <2 x double> @test_sub_sd_round(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: @test_sub_sd_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
-  ret <2 x double> %2
-}
-
-define <2 x double> @test_sub_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_sub_sd_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = fsub double [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
-; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
-; CHECK-NEXT:    ret <2 x double> [[TMP8]]
-;
-  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
-  ret <2 x double> %2
-}
-
-define <2 x double> @test_sub_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_sub_sd_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
-  ret <2 x double> %2
-}
-
-define double @test_sub_sd_1(double %a, double %b) {
-; CHECK-LABEL: @test_sub_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
-  %6 = extractelement <2 x double> %5, i32 1
-  ret double %6
-}
-
-declare <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
-
-define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @test_mul_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
-; CHECK-NEXT:    ret <4 x float> [[TMP4]]
-;
-  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
-  ret <4 x float> %4
-}
-
-define <4 x float> @test_mul_ss_round(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @test_mul_ss_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)
-  ret <4 x float> %4
-}
-
-define <4 x float> @test_mul_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_mul_ss_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
-; CHECK-NEXT:    ret <4 x float> [[TMP8]]
-;
-  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
-  ret <4 x float> %4
-}
-
-define <4 x float> @test_mul_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_mul_ss_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 8)
-  ret <4 x float> %4
-}
-
-define float @test_mul_ss_1(float %a, float %b) {
-; CHECK-LABEL: @test_mul_ss_1(
-; CHECK-NEXT:    ret float 1.000000e+00
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
-  %10 = extractelement <4 x float> %9, i32 1
-  ret float %10
-}
-
-declare <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
-
-define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: @test_mul_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul double [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
-; CHECK-NEXT:    ret <2 x double> [[TMP4]]
-;
-  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
-  ret <2 x double> %2
-}
-
-define <2 x double> @test_mul_sd_round(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: @test_mul_sd_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
-  ret <2 x double> %2
-}
-
-define <2 x double> @test_mul_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_mul_sd_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = fmul double [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
-; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
-; CHECK-NEXT:    ret <2 x double> [[TMP8]]
-;
-  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
-  ret <2 x double> %2
-}
-
-define <2 x double> @test_mul_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_mul_sd_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
-  ret <2 x double> %2
-}
-
-define double @test_mul_sd_1(double %a, double %b) {
-; CHECK-LABEL: @test_mul_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
-  %6 = extractelement <2 x double> %5, i32 1
-  ret double %6
-}
-
-declare <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
-
-define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @test_div_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
-; CHECK-NEXT:    ret <4 x float> [[TMP4]]
-;
-  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
-  ret <4 x float> %4
-}
-
-define <4 x float> @test_div_ss_round(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @test_div_ss_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)
-  ret <4 x float> %4
-}
-
-define <4 x float> @test_div_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_div_ss_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
-; CHECK-NEXT:    ret <4 x float> [[TMP8]]
-;
-  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
-  ret <4 x float> %4
-}
-
-define <4 x float> @test_div_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_div_ss_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 8)
-  ret <4 x float> %4
-}
-
-define float @test_div_ss_1(float %a, float %b) {
-; CHECK-LABEL: @test_div_ss_1(
-; CHECK-NEXT:    ret float 1.000000e+00
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
-  %10 = extractelement <4 x float> %9, i32 1
-  ret float %10
-}
-
-declare <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
-
-define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: @test_div_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = fdiv double [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
-; CHECK-NEXT:    ret <2 x double> [[TMP4]]
-;
-  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
-  ret <2 x double> %2
-}
-
-define <2 x double> @test_div_sd_round(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: @test_div_sd_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
-  ret <2 x double> %2
-}
-
-define <2 x double> @test_div_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_div_sd_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
-; CHECK-NEXT:    [[TMP3:%.*]] = fdiv double [[TMP1]], [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
-; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
-; CHECK-NEXT:    [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
-; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
-; CHECK-NEXT:    ret <2 x double> [[TMP8]]
-;
-  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
-  ret <2 x double> %2
-}
-
-define <2 x double> @test_div_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_div_sd_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
-  ret <2 x double> %2
-}
-
-define double @test_div_sd_1(double %a, double %b) {
-; CHECK-LABEL: @test_div_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
-  %6 = extractelement <2 x double> %5, i32 1
-  ret double %6
-}
-
-declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
-
-define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @test_max_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 4)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
-  ret <4 x float> %4
-}
-
-define <4 x float> @test_max_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_max_ss_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
-  ret <4 x float> %4
-}
-
-define float @test_max_ss_1(float %a, float %b) {
-; CHECK-LABEL: @test_max_ss_1(
-; CHECK-NEXT:    ret float 1.000000e+00
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
-  %10 = extractelement <4 x float> %9, i32 1
-  ret float %10
-}
-
-declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
-
-define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: @test_max_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 4)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
-  ret <2 x double> %2
-}
-
-define <2 x double> @test_max_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_max_sd_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
-  ret <2 x double> %2
-}
-
-define double @test_max_sd_1(double %a, double %b) {
-; CHECK-LABEL: @test_max_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
-  %6 = extractelement <2 x double> %5, i32 1
-  ret double %6
-}
-
-declare <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
-
-define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @test_min_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 4)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
-  ret <4 x float> %4
-}
-
-define <4 x float> @test_min_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_min_ss_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
-  ret <4 x float> %4
-}
-
-define float @test_min_ss_1(float %a, float %b) {
-; CHECK-LABEL: @test_min_ss_1(
-; CHECK-NEXT:    ret float 1.000000e+00
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
-  %10 = extractelement <4 x float> %9, i32 1
-  ret float %10
-}
-
-declare <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
-
-define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: @test_min_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 4)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
-  ret <2 x double> %2
-}
-
-define <2 x double> @test_min_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_min_sd_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
-  ret <2 x double> %2
-}
-
-define double @test_min_sd_1(double %a, double %b) {
-; CHECK-LABEL: @test_min_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
-  %6 = extractelement <2 x double> %5, i32 1
-  ret double %6
-}
-
-declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32)
-
-define i8 @test_cmp_ss(<4 x float> %a, <4 x float> %b, i8 %mask) {
-; CHECK-LABEL: @test_cmp_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 3, i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    ret i8 [[TMP1]]
-;
-  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = insertelement <4 x float> %b, float 4.000000e+00, i32 1
-  %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
-  %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
-  %7 = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %3, <4 x float> %6, i32 3, i8 %mask, i32 4)
-  ret i8 %7
-}
-
-declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32)
-
-define i8 @test_cmp_sd(<2 x double> %a, <2 x double> %b, i8 %mask) {
-; CHECK-LABEL: @test_cmp_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 3, i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    ret i8 [[TMP1]]
-;
-  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
-  %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
-  %3 = tail call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %1, <2 x double> %2, i32 3, i8 %mask, i32 4)
-  ret i8 %3
-}
-
-define i64 @test(float %f, double %d) {
-; CHECK-LABEL: @test(
-; CHECK-NEXT:    [[V03:%.*]] = insertelement <4 x float> undef, float [[F:%.*]], i32 0
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> [[V03]], i32 4)
-; CHECK-NEXT:    [[V13:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> [[V13]], i32 4)
-; CHECK-NEXT:    [[V23:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> [[V23]], i32 4)
-; CHECK-NEXT:    [[V33:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> [[V33]], i32 4)
-; CHECK-NEXT:    [[V41:%.*]] = insertelement <2 x double> undef, double [[D:%.*]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = tail call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> [[V41]], i32 4)
-; CHECK-NEXT:    [[V51:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = tail call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> [[V51]], i32 4)
-; CHECK-NEXT:    [[V61:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> [[V61]], i32 4)
-; CHECK-NEXT:    [[V71:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = tail call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> [[V71]], i32 4)
-; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP0]], [[TMP2]]
-; CHECK-NEXT:    [[TMP9:%.*]] = add i32 [[TMP4]], [[TMP6]]
-; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
-; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
-; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]]
-; CHECK-NEXT:    ret i64 [[TMP15]]
-;
-  %v00 = insertelement <4 x float> undef, float %f, i32 0
-  %v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1
-  %v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2
-  %v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3
-  %tmp0 = tail call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %v03, i32 4)
-  %v10 = insertelement <4 x float> undef, float %f, i32 0
-  %v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1
-  %v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2
-  %v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3
-  %tmp1 = tail call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %v13, i32 4)
-  %v20 = insertelement <4 x float> undef, float %f, i32 0
-  %v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1
-  %v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2
-  %v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3
-  %tmp2 = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %v23, i32 4)
-  %v30 = insertelement <4 x float> undef, float %f, i32 0
-  %v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1
-  %v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2
-  %v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3
-  %tmp3 = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %v33, i32 4)
-  %v40 = insertelement <2 x double> undef, double %d, i32 0
-  %v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1
-  %tmp4 = tail call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %v41, i32 4)
-  %v50 = insertelement <2 x double> undef, double %d, i32 0
-  %v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1
-  %tmp5 = tail call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %v51, i32 4)
-  %v60 = insertelement <2 x double> undef, double %d, i32 0
-  %v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1
-  %tmp6 = tail call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %v61, i32 4)
-  %v70 = insertelement <2 x double> undef, double %d, i32 0
-  %v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1
-  %tmp7 = tail call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %v71, i32 4)
-  %tmp8 = add i32 %tmp0, %tmp2
-  %tmp9 = add i32 %tmp4, %tmp6
-  %tmp10 = add i32 %tmp8, %tmp9
-  %tmp11 = sext i32 %tmp10 to i64
-  %tmp12 = add i64 %tmp1, %tmp3
-  %tmp13 = add i64 %tmp5, %tmp7
-  %tmp14 = add i64 %tmp12, %tmp13
-  %tmp15 = add i64 %tmp11, %tmp14
-  ret i64 %tmp15
-}
-
-declare i32 @llvm.x86.avx512.vcvtss2si32(<4 x float>, i32)
-declare i64 @llvm.x86.avx512.vcvtss2si64(<4 x float>, i32)
-declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32)
-declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32)
-declare i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double>, i32)
-declare i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double>, i32)
-declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32)
-declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32)
-
-define i64 @test2(float %f, double %d) {
-; CHECK-LABEL: @test2(
-; CHECK-NEXT:    [[V03:%.*]] = insertelement <4 x float> undef, float [[F:%.*]], i32 0
-; CHECK-NEXT:    [[TMP0:%.*]] = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> [[V03]], i32 4)
-; CHECK-NEXT:    [[V13:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> [[V13]], i32 4)
-; CHECK-NEXT:    [[V23:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> [[V23]], i32 4)
-; CHECK-NEXT:    [[V33:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> [[V33]], i32 4)
-; CHECK-NEXT:    [[V41:%.*]] = insertelement <2 x double> undef, double [[D:%.*]], i32 0
-; CHECK-NEXT:    [[TMP4:%.*]] = tail call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> [[V41]], i32 4)
-; CHECK-NEXT:    [[V51:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = tail call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> [[V51]], i32 4)
-; CHECK-NEXT:    [[V61:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = tail call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> [[V61]], i32 4)
-; CHECK-NEXT:    [[V71:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
-; CHECK-NEXT:    [[TMP7:%.*]] = tail call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> [[V71]], i32 4)
-; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[TMP0]], [[TMP2]]
-; CHECK-NEXT:    [[TMP9:%.*]] = add i32 [[TMP4]], [[TMP6]]
-; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]]
-; CHECK-NEXT:    [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
-; CHECK-NEXT:    [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
-; CHECK-NEXT:    [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
-; CHECK-NEXT:    [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
-; CHECK-NEXT:    [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]]
-; CHECK-NEXT:    ret i64 [[TMP15]]
-;
-  %v00 = insertelement <4 x float> undef, float %f, i32 0
-  %v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1
-  %v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2
-  %v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3
-  %tmp0 = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %v03, i32 4)
-  %v10 = insertelement <4 x float> undef, float %f, i32 0
-  %v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1
-  %v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2
-  %v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3
-  %tmp1 = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %v13, i32 4)
-  %v20 = insertelement <4 x float> undef, float %f, i32 0
-  %v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1
-  %v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2
-  %v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3
-  %tmp2 = tail call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %v23, i32 4)
-  %v30 = insertelement <4 x float> undef, float %f, i32 0
-  %v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1
-  %v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2
-  %v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3
-  %tmp3 = tail call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %v33, i32 4)
-  %v40 = insertelement <2 x double> undef, double %d, i32 0
-  %v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1
-  %tmp4 = tail call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %v41, i32 4)
-  %v50 = insertelement <2 x double> undef, double %d, i32 0
-  %v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1
-  %tmp5 = tail call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %v51, i32 4)
-  %v60 = insertelement <2 x double> undef, double %d, i32 0
-  %v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1
-  %tmp6 = tail call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %v61, i32 4)
-  %v70 = insertelement <2 x double> undef, double %d, i32 0
-  %v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1
-  %tmp7 = tail call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %v71, i32 4)
-  %tmp8 = add i32 %tmp0, %tmp2
-  %tmp9 = add i32 %tmp4, %tmp6
-  %tmp10 = add i32 %tmp8, %tmp9
-  %tmp11 = sext i32 %tmp10 to i64
-  %tmp12 = add i64 %tmp1, %tmp3
-  %tmp13 = add i64 %tmp5, %tmp7
-  %tmp14 = add i64 %tmp12, %tmp13
-  %tmp15 = add i64 %tmp11, %tmp14
-  ret i64 %tmp15
-}
-
-declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32)
-declare i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float>, i32)
-declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32)
-declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32)
-declare i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double>, i32)
-declare i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double>, i32)
-declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32)
-declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32)
-
-declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
-
-define <4 x float> @test_mask_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask_vfmadd_ss(
-; CHECK-NEXT:    [[RES:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    ret <4 x float> [[RES]]
-;
-  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1
-  %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
-  %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
-  %res = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %a, <4 x float> %3, <4 x float> %6, i8 %mask, i32 4)
-  ret <4 x float> %res
-}
-
-define float @test_mask_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask_vfmadd_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
-; CHECK-NEXT:    ret float [[TMP2]]
-;
-  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c, i8 %mask, i32 4)
-  %5 = extractelement <4 x float> %4, i32 0
-  ret float %5
-}
-
-define float @test_mask_vfmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask_vfmadd_ss_1(
-; CHECK-NEXT:    ret float 1.000000e+00
-;
-  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c, i8 %mask, i32 4)
-  %5 = extractelement <4 x float> %4, i32 1
-  ret float %5
-}
-
-declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
-
-define <2 x double> @test_mask_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask_vfmadd_sd(
-; CHECK-NEXT:    [[RES:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    ret <2 x double> [[RES]]
-;
-  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
-  %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
-  %res = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %a, <2 x double> %1, <2 x double> %2, i8 %mask, i32 4)
-  ret <2 x double> %res
-}
-
-define double @test_mask_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask_vfmadd_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
-; CHECK-NEXT:    ret double [[TMP2]]
-;
-  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4)
-  %3 = extractelement <2 x double> %2, i32 0
-  ret double %3
-}
-
-define double @test_mask_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask_vfmadd_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4)
-  %3 = extractelement <2 x double> %2, i32 1
-  ret double %3
-}
-
-declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
-
-define <4 x float> @test_maskz_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_maskz_vfmadd_ss(
-; CHECK-NEXT:    [[RES:%.*]] = tail call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    ret <4 x float> [[RES]]
-;
-  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1
-  %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
-  %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
-  %res = tail call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %a, <4 x float> %3, <4 x float> %6, i8 %mask, i32 4)
-  ret <4 x float> %res
-}
-
-define float @test_maskz_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_maskz_vfmadd_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
-; CHECK-NEXT:    ret float [[TMP2]]
-;
-  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c, i8 %mask, i32 4)
-  %5 = extractelement <4 x float> %4, i32 0
-  ret float %5
-}
-
-define float @test_maskz_vfmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_maskz_vfmadd_ss_1(
-; CHECK-NEXT:    ret float 1.000000e+00
-;
-  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c, i8 %mask, i32 4)
-  %5 = extractelement <4 x float> %4, i32 1
-  ret float %5
-}
-
-declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
-
-define <2 x double> @test_maskz_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_maskz_vfmadd_sd(
-; CHECK-NEXT:    [[RES:%.*]] = tail call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    ret <2 x double> [[RES]]
-;
-  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
-  %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
-  %res = tail call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %a, <2 x double> %1, <2 x double> %2, i8 %mask, i32 4)
-  ret <2 x double> %res
-}
-
-define double @test_maskz_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_maskz_vfmadd_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
-; CHECK-NEXT:    ret double [[TMP2]]
-;
-  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4)
-  %3 = extractelement <2 x double> %2, i32 0
-  ret double %3
-}
-
-define double @test_maskz_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_maskz_vfmadd_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c, i8 %mask, i32 4)
-  %3 = extractelement <2 x double> %2, i32 1
-  ret double %3
-}
-
-declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
-
-define <4 x float> @test_mask3_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask3_vfmadd_ss(
-; CHECK-NEXT:    [[RES:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    ret <4 x float> [[RES]]
-;
-  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = insertelement <4 x float> %b, float 4.000000e+00, i32 1
-  %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
-  %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
-  %res = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %3, <4 x float> %6, <4 x float> %c, i8 %mask, i32 4)
-  ret <4 x float> %res
-}
-
-define float @test_mask3_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask3_vfmadd_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
-; CHECK-NEXT:    ret float [[TMP2]]
-;
-  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
-  %5 = extractelement <4 x float> %4, i32 0
-  ret float %5
-}
-
-define float @test_mask3_vfmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask3_vfmadd_ss_1(
-; CHECK-NEXT:    ret float 1.000000e+00
-;
-  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
-  %5 = extractelement <4 x float> %4, i32 1
-  ret float %5
-}
-
-declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
-
-define <2 x double> @test_mask3_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask3_vfmadd_sd(
-; CHECK-NEXT:    [[RES:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    ret <2 x double> [[RES]]
-;
-  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
-  %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
-  %res = tail call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %1, <2 x double> %2, <2 x double> %c, i8 %mask, i32 4)
-  ret <2 x double> %res
-}
-
-define double @test_mask3_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask3_vfmadd_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
-; CHECK-NEXT:    ret double [[TMP2]]
-;
-  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
-  %3 = extractelement <2 x double> %2, i32 0
-  ret double %3
-}
-
-define double @test_mask3_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask3_vfmadd_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
-  %3 = extractelement <2 x double> %2, i32 1
-  ret double %3
-}
-
-declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
-
-define <4 x float> @test_mask3_vfmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask3_vfmsub_ss(
-; CHECK-NEXT:    [[RES:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    ret <4 x float> [[RES]]
-;
-  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = insertelement <4 x float> %b, float 4.000000e+00, i32 1
-  %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
-  %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
-  %res = tail call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %3, <4 x float> %6, <4 x float> %c, i8 %mask, i32 4)
-  ret <4 x float> %res
-}
-
-define float @test_mask3_vfmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask3_vfmsub_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
-; CHECK-NEXT:    ret float [[TMP2]]
-;
-  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
-  %5 = extractelement <4 x float> %4, i32 0
-  ret float %5
-}
-
-define float @test_mask3_vfmsub_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask3_vfmsub_ss_1(
-; CHECK-NEXT:    ret float 1.000000e+00
-;
-  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
-  %5 = extractelement <4 x float> %4, i32 1
-  ret float %5
-}
-
-declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
-
-define <2 x double> @test_mask3_vfmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask3_vfmsub_sd(
-; CHECK-NEXT:    [[RES:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    ret <2 x double> [[RES]]
-;
-  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
-  %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
-  %res = tail call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %1, <2 x double> %2, <2 x double> %c, i8 %mask, i32 4)
-  ret <2 x double> %res
-}
-
-define double @test_mask3_vfmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask3_vfmsub_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
-; CHECK-NEXT:    ret double [[TMP2]]
-;
-  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
-  %3 = extractelement <2 x double> %2, i32 0
-  ret double %3
-}
-
-define double @test_mask3_vfmsub_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask3_vfmsub_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
-  %3 = extractelement <2 x double> %2, i32 1
-  ret double %3
-}
-
-declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
-
-define <4 x float> @test_mask3_vfnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask3_vfnmsub_ss(
-; CHECK-NEXT:    [[RES:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    ret <4 x float> [[RES]]
-;
-  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = insertelement <4 x float> %b, float 4.000000e+00, i32 1
-  %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
-  %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
-  %res = tail call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %3, <4 x float> %6, <4 x float> %c, i8 %mask, i32 4)
-  ret <4 x float> %res
-}
-
-define float @test_mask3_vfnmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask3_vfnmsub_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
-; CHECK-NEXT:    ret float [[TMP2]]
-;
-  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
-  %5 = extractelement <4 x float> %4, i32 0
-  ret float %5
-}
-
-define float @test_mask3_vfnmsub_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask3_vfnmsub_ss_1(
-; CHECK-NEXT:    ret float 1.000000e+00
-;
-  %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
-  %5 = extractelement <4 x float> %4, i32 1
-  ret float %5
-}
-
-declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
-
-define <2 x double> @test_mask3_vfnmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask3_vfnmsub_sd(
-; CHECK-NEXT:    [[RES:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    ret <2 x double> [[RES]]
-;
-  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
-  %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
-  %res = tail call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %1, <2 x double> %2, <2 x double> %c, i8 %mask, i32 4)
-  ret <2 x double> %res
-}
-
-define double @test_mask3_vfnmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask3_vfnmsub_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
-; CHECK-NEXT:    ret double [[TMP2]]
-;
-  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
-  %3 = extractelement <2 x double> %2, i32 0
-  ret double %3
-}
-
-define double @test_mask3_vfnmsub_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_mask3_vfnmsub_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
-  %3 = extractelement <2 x double> %2, i32 1
-  ret double %3
-}
-
-declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
-
-define <8 x i32> @identity_test_permvar_si_256(<8 x i32> %a0) {
-; CHECK-LABEL: @identity_test_permvar_si_256(
-; CHECK-NEXT:    ret <8 x i32> [[A0:%.*]]
-;
-  %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, <8 x i32> undef, i8 -1)
-  ret <8 x i32> %a
-}
-
-define <8 x i32> @identity_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
-; CHECK-LABEL: @identity_test_permvar_si_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[A0:%.*]], <8 x i32> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
-;
-  %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, <8 x i32> %passthru, i8 %mask)
-  ret <8 x i32> %a
-}
-
-define <8 x i32> @zero_test_permvar_si_256(<8 x i32> %a0) {
-; CHECK-LABEL: @zero_test_permvar_si_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> zeroinitializer, <8 x i32> undef, i8 -1)
-  ret <8 x i32> %a
-}
-
-define <8 x i32> @zero_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
-; CHECK-LABEL: @zero_test_permvar_si_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <8 x i32> [[TMP3]]
-;
-  %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> zeroinitializer, <8 x i32> %passthru, i8 %mask)
-  ret <8 x i32> %a
-}
-
-define <8 x i32> @shuffle_test_permvar_si_256(<8 x i32> %a0) {
-; CHECK-LABEL: @shuffle_test_permvar_si_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x i32> undef, i8 -1)
-  ret <8 x i32> %a
-}
-
-define <8 x i32> @shuffle_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
-; CHECK-LABEL: @shuffle_test_permvar_si_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <8 x i32> [[TMP3]]
-;
-  %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x i32> %passthru, i8 %mask)
-  ret <8 x i32> %a
-}
-
-define <8 x i32> @undef_test_permvar_si_256(<8 x i32> %a0) {
-; CHECK-LABEL: @undef_test_permvar_si_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x i32> undef, i8 -1)
-  ret <8 x i32> %a
-}
-
-define <8 x i32> @undef_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
-; CHECK-LABEL: @undef_test_permvar_si_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <8 x i32> [[TMP3]]
-;
-  %a = tail call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x i32> %passthru, i8 %mask)
-  ret <8 x i32> %a
-}
-
-declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8)
-
-define <8 x float> @identity_test_permvar_sf_256(<8 x float> %a0) {
-; CHECK-LABEL: @identity_test_permvar_sf_256(
-; CHECK-NEXT:    ret <8 x float> [[A0:%.*]]
-;
-  %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, <8 x float> undef, i8 -1)
-  ret <8 x float> %a
-}
-
-define <8 x float> @identity_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
-; CHECK-LABEL: @identity_test_permvar_sf_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[A0:%.*]], <8 x float> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <8 x float> [[TMP2]]
-;
-  %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, <8 x float> %passthru, i8 %mask)
-  ret <8 x float> %a
-}
-
-define <8 x float> @zero_test_permvar_sf_256(<8 x float> %a0) {
-; CHECK-LABEL: @zero_test_permvar_sf_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT:    ret <8 x float> [[TMP1]]
-;
-  %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> zeroinitializer, <8 x float> undef, i8 -1)
-  ret <8 x float> %a
-}
-
-define <8 x float> @zero_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
-; CHECK-LABEL: @zero_test_permvar_sf_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <8 x float> [[TMP3]]
-;
-  %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> zeroinitializer, <8 x float> %passthru, i8 %mask)
-  ret <8 x float> %a
-}
-
-define <8 x float> @shuffle_test_permvar_sf_256(<8 x float> %a0) {
-; CHECK-LABEL: @shuffle_test_permvar_sf_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <8 x float> [[TMP1]]
-;
-  %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x float> undef, i8 -1)
-  ret <8 x float> %a
-}
-
-define <8 x float> @shuffle_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
-; CHECK-LABEL: @shuffle_test_permvar_sf_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <8 x float> [[TMP3]]
-;
-  %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x float> %passthru, i8 %mask)
-  ret <8 x float> %a
-}
-
-define <8 x float> @undef_test_permvar_sf_256(<8 x float> %a0) {
-; CHECK-LABEL: @undef_test_permvar_sf_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <8 x float> [[TMP1]]
-;
-  %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x float> undef, i8 -1)
-  ret <8 x float> %a
-}
-
-define <8 x float> @undef_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
-; CHECK-LABEL: @undef_test_permvar_sf_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <8 x float> [[TMP3]]
-;
-  %a = tail call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <8 x float> %passthru, i8 %mask)
-  ret <8 x float> %a
-}
-
-declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
-
-define <4 x i64> @identity_test_permvar_di_256(<4 x i64> %a0) {
-; CHECK-LABEL: @identity_test_permvar_di_256(
-; CHECK-NEXT:    ret <4 x i64> [[A0:%.*]]
-;
-  %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>, <4 x i64> undef, i8 -1)
-  ret <4 x i64> %a
-}
-
-define <4 x i64> @identity_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
-; CHECK-LABEL: @identity_test_permvar_di_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[A0:%.*]], <4 x i64> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <4 x i64> [[TMP2]]
-;
-  %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>, <4 x i64> %passthru, i8 %mask)
-  ret <4 x i64> %a
-}
-
-define <4 x i64> @zero_test_permvar_di_256(<4 x i64> %a0) {
-; CHECK-LABEL: @zero_test_permvar_di_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer, <4 x i64> undef, i8 -1)
-  ret <4 x i64> %a
-}
-
-define <4 x i64> @zero_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
-; CHECK-LABEL: @zero_test_permvar_di_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <4 x i64> [[TMP3]]
-;
-  %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer, <4 x i64> %passthru, i8 %mask)
-  ret <4 x i64> %a
-}
-
-define <4 x i64> @shuffle_test_permvar_di_256(<4 x i64> %a0) {
-; CHECK-LABEL: @shuffle_test_permvar_di_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>, <4 x i64> undef, i8 -1)
-  ret <4 x i64> %a
-}
-
-define <4 x i64> @shuffle_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
-; CHECK-LABEL: @shuffle_test_permvar_di_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <4 x i64> [[TMP3]]
-;
-  %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>, <4 x i64> %passthru, i8 %mask)
-  ret <4 x i64> %a
-}
-
-define <4 x i64> @undef_test_permvar_di_256(<4 x i64> %a0) {
-; CHECK-LABEL: @undef_test_permvar_di_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>, <4 x i64> undef, i8 -1)
-  ret <4 x i64> %a
-}
-
-define <4 x i64> @undef_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
-; CHECK-LABEL: @undef_test_permvar_di_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <4 x i64> [[TMP3]]
-;
-  %a = tail call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>, <4 x i64> %passthru, i8 %mask)
-  ret <4 x i64> %a
-}
-
-declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64>, <4 x double>, i8)
-
-define <4 x double> @identity_test_permvar_df_256(<4 x double> %a0) {
-; CHECK-LABEL: @identity_test_permvar_df_256(
-; CHECK-NEXT:    ret <4 x double> [[A0:%.*]]
-;
-  %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>, <4 x double> undef, i8 -1)
-  ret <4 x double> %a
-}
-
-define <4 x double> @identity_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
-; CHECK-LABEL: @identity_test_permvar_df_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[A0:%.*]], <4 x double> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <4 x double> [[TMP2]]
-;
-  %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>, <4 x double> %passthru, i8 %mask)
-  ret <4 x double> %a
-}
-
-define <4 x double> @zero_test_permvar_df_256(<4 x double> %a0) {
-; CHECK-LABEL: @zero_test_permvar_df_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer, <4 x double> undef, i8 -1)
-  ret <4 x double> %a
-}
-
-define <4 x double> @zero_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
-; CHECK-LABEL: @zero_test_permvar_df_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <4 x double> [[TMP3]]
-;
-  %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer, <4 x double> %passthru, i8 %mask)
-  ret <4 x double> %a
-}
-
-define <4 x double> @shuffle_test_permvar_df_256(<4 x double> %a0) {
-; CHECK-LABEL: @shuffle_test_permvar_df_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>, <4 x double> undef, i8 -1)
-  ret <4 x double> %a
-}
-
-define <4 x double> @shuffle_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
-; CHECK-LABEL: @shuffle_test_permvar_df_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <4 x double> [[TMP3]]
-;
-  %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>, <4 x double> %passthru, i8 %mask)
-  ret <4 x double> %a
-}
-
-define <4 x double> @undef_test_permvar_df_256(<4 x double> %a0) {
-; CHECK-LABEL: @undef_test_permvar_df_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>, <4 x double> undef, i8 -1)
-  ret <4 x double> %a
-}
-
-define <4 x double> @undef_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
-; CHECK-LABEL: @undef_test_permvar_df_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <4 x double> [[TMP3]]
-;
-  %a = tail call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>, <4 x double> %passthru, i8 %mask)
-  ret <4 x double> %a
-}
-
-declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
-
-define <16 x i32> @identity_test_permvar_si_512(<16 x i32> %a0) {
-; CHECK-LABEL: @identity_test_permvar_si_512(
-; CHECK-NEXT:    ret <16 x i32> [[A0:%.*]]
-;
-  %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x i32> undef, i16 -1)
-  ret <16 x i32> %a
-}
-
-define <16 x i32> @identity_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
-; CHECK-LABEL: @identity_test_permvar_si_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[A0:%.*]], <16 x i32> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <16 x i32> [[TMP2]]
-;
-  %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x i32> %passthru, i16 %mask)
-  ret <16 x i32> %a
-}
-
-define <16 x i32> @zero_test_permvar_si_512(<16 x i32> %a0) {
-; CHECK-LABEL: @zero_test_permvar_si_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> zeroinitializer
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer, <16 x i32> undef, i16 -1)
-  ret <16 x i32> %a
-}
-
-define <16 x i32> @zero_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
-; CHECK-LABEL: @zero_test_permvar_si_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <16 x i32> [[TMP3]]
-;
-  %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer, <16 x i32> %passthru, i16 %mask)
-  ret <16 x i32> %a
-}
-
-define <16 x i32> @shuffle_test_permvar_si_512(<16 x i32> %a0) {
-; CHECK-LABEL: @shuffle_test_permvar_si_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x i32> undef, i16 -1)
-  ret <16 x i32> %a
-}
-
-define <16 x i32> @shuffle_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
-; CHECK-LABEL: @shuffle_test_permvar_si_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <16 x i32> [[TMP3]]
-;
-  %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x i32> %passthru, i16 %mask)
-  ret <16 x i32> %a
-}
-
-define <16 x i32> @undef_test_permvar_si_512(<16 x i32> %a0) {
-; CHECK-LABEL: @undef_test_permvar_si_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x i32> undef, i16 -1)
-  ret <16 x i32> %a
-}
-
-define <16 x i32> @undef_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
-; CHECK-LABEL: @undef_test_permvar_si_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <16 x i32> [[TMP3]]
-;
-  %a = tail call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x i32> %passthru, i16 %mask)
-  ret <16 x i32> %a
-}
-
-declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i32>, <16 x float>, i16)
-
-define <16 x float> @identity_test_permvar_sf_512(<16 x float> %a0) {
-; CHECK-LABEL: @identity_test_permvar_sf_512(
-; CHECK-NEXT:    ret <16 x float> [[A0:%.*]]
-;
-  %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x float> undef, i16 -1)
-  ret <16 x float> %a
-}
-
-define <16 x float> @identity_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
-; CHECK-LABEL: @identity_test_permvar_sf_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x float> [[A0:%.*]], <16 x float> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <16 x float> [[TMP2]]
-;
-  %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>, <16 x float> %passthru, i16 %mask)
-  ret <16 x float> %a
-}
-
-define <16 x float> @zero_test_permvar_sf_512(<16 x float> %a0) {
-; CHECK-LABEL: @zero_test_permvar_sf_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> zeroinitializer
-; CHECK-NEXT:    ret <16 x float> [[TMP1]]
-;
-  %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer, <16 x float> undef, i16 -1)
-  ret <16 x float> %a
-}
-
-define <16 x float> @zero_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
-; CHECK-LABEL: @zero_test_permvar_sf_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <16 x float> [[TMP3]]
-;
-  %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer, <16 x float> %passthru, i16 %mask)
-  ret <16 x float> %a
-}
-
-define <16 x float> @shuffle_test_permvar_sf_512(<16 x float> %a0) {
-; CHECK-LABEL: @shuffle_test_permvar_sf_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <16 x float> [[TMP1]]
-;
-  %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x float> undef, i16 -1)
-  ret <16 x float> %a
-}
-
-define <16 x float> @shuffle_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
-; CHECK-LABEL: @shuffle_test_permvar_sf_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <16 x float> [[TMP3]]
-;
-  %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x float> %passthru, i16 %mask)
-  ret <16 x float> %a
-}
-
-define <16 x float> @undef_test_permvar_sf_512(<16 x float> %a0) {
-; CHECK-LABEL: @undef_test_permvar_sf_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <16 x float> [[TMP1]]
-;
-  %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x float> undef, i16 -1)
-  ret <16 x float> %a
-}
-
-define <16 x float> @undef_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
-; CHECK-LABEL: @undef_test_permvar_sf_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <16 x float> [[TMP3]]
-;
-  %a = tail call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>, <16 x float> %passthru, i16 %mask)
-  ret <16 x float> %a
-}
-
-declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
-
-define <8 x i64> @identity_test_permvar_di_512(<8 x i64> %a0) {
-; CHECK-LABEL: @identity_test_permvar_di_512(
-; CHECK-NEXT:    ret <8 x i64> [[A0:%.*]]
-;
-  %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, <8 x i64> undef, i8 -1)
-  ret <8 x i64> %a
-}
-
-define <8 x i64> @identity_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
-; CHECK-LABEL: @identity_test_permvar_di_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i64> [[A0:%.*]], <8 x i64> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <8 x i64> [[TMP2]]
-;
-  %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, <8 x i64> %passthru, i8 %mask)
-  ret <8 x i64> %a
-}
-
-define <8 x i64> @zero_test_permvar_di_512(<8 x i64> %a0) {
-; CHECK-LABEL: @zero_test_permvar_di_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer, <8 x i64> undef, i8 -1)
-  ret <8 x i64> %a
-}
-
-define <8 x i64> @zero_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
-; CHECK-LABEL: @zero_test_permvar_di_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <8 x i64> [[TMP3]]
-;
-  %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer, <8 x i64> %passthru, i8 %mask)
-  ret <8 x i64> %a
-}
-
-define <8 x i64> @shuffle_test_permvar_di_512(<8 x i64> %a0) {
-; CHECK-LABEL: @shuffle_test_permvar_di_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> undef, i8 -1)
-  ret <8 x i64> %a
-}
-
-define <8 x i64> @shuffle_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
-; CHECK-LABEL: @shuffle_test_permvar_di_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <8 x i64> [[TMP3]]
-;
-  %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> %passthru, i8 %mask)
-  ret <8 x i64> %a
-}
-
-define <8 x i64> @undef_test_permvar_di_512(<8 x i64> %a0) {
-; CHECK-LABEL: @undef_test_permvar_di_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> undef, i8 -1)
-  ret <8 x i64> %a
-}
-
-define <8 x i64> @undef_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
-; CHECK-LABEL: @undef_test_permvar_di_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <8 x i64> [[TMP3]]
-;
-  %a = tail call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x i64> %passthru, i8 %mask)
-  ret <8 x i64> %a
-}
-
-declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8)
-
-define <8 x double> @identity_test_permvar_df_512(<8 x double> %a0) {
-; CHECK-LABEL: @identity_test_permvar_df_512(
-; CHECK-NEXT:    ret <8 x double> [[A0:%.*]]
-;
-  %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, <8 x double> undef, i8 -1)
-  ret <8 x double> %a
-}
-
-define <8 x double> @identity_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
-; CHECK-LABEL: @identity_test_permvar_df_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x double> [[A0:%.*]], <8 x double> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <8 x double> [[TMP2]]
-;
-  %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>, <8 x double> %passthru, i8 %mask)
-  ret <8 x double> %a
-}
-
-define <8 x double> @zero_test_permvar_df_512(<8 x double> %a0) {
-; CHECK-LABEL: @zero_test_permvar_df_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT:    ret <8 x double> [[TMP1]]
-;
-  %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer, <8 x double> undef, i8 -1)
-  ret <8 x double> %a
-}
-
-define <8 x double> @zero_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
-; CHECK-LABEL: @zero_test_permvar_df_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <8 x double> [[TMP3]]
-;
-  %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer, <8 x double> %passthru, i8 %mask)
-  ret <8 x double> %a
-}
-
-define <8 x double> @shuffle_test_permvar_df_512(<8 x double> %a0) {
-; CHECK-LABEL: @shuffle_test_permvar_df_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <8 x double> [[TMP1]]
-;
-  %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x double> undef, i8 -1)
-  ret <8 x double> %a
-}
-
-define <8 x double> @shuffle_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
-; CHECK-LABEL: @shuffle_test_permvar_df_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <8 x double> [[TMP3]]
-;
-  %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x double> %passthru, i8 %mask)
-  ret <8 x double> %a
-}
-
-define <8 x double> @undef_test_permvar_df_512(<8 x double> %a0) {
-; CHECK-LABEL: @undef_test_permvar_df_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <8 x double> [[TMP1]]
-;
-  %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x double> undef, i8 -1)
-  ret <8 x double> %a
-}
-
-define <8 x double> @undef_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
-; CHECK-LABEL: @undef_test_permvar_df_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <8 x double> [[TMP3]]
-;
-  %a = tail call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>, <8 x double> %passthru, i8 %mask)
-  ret <8 x double> %a
-}
-
-declare <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
-
-define <8 x i16> @identity_test_permvar_hi_128(<8 x i16> %a0) {
-; CHECK-LABEL: @identity_test_permvar_hi_128(
-; CHECK-NEXT:    ret <8 x i16> [[A0:%.*]]
-;
-  %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, <8 x i16> undef, i8 -1)
-  ret <8 x i16> %a
-}
-
-define <8 x i16> @identity_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
-; CHECK-LABEL: @identity_test_permvar_hi_128_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[A0:%.*]], <8 x i16> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
-;
-  %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, <8 x i16> %passthru, i8 %mask)
-  ret <8 x i16> %a
-}
-
-define <8 x i16> @zero_test_permvar_hi_128(<8 x i16> %a0) {
-; CHECK-LABEL: @zero_test_permvar_hi_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i16> undef, i8 -1)
-  ret <8 x i16> %a
-}
-
-define <8 x i16> @zero_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
-; CHECK-LABEL: @zero_test_permvar_hi_128_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
-;
-  %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i16> %passthru, i8 %mask)
-  ret <8 x i16> %a
-}
-
-define <8 x i16> @shuffle_test_permvar_hi_128(<8 x i16> %a0) {
-; CHECK-LABEL: @shuffle_test_permvar_hi_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <8 x i16> undef, i8 -1)
-  ret <8 x i16> %a
-}
-
-define <8 x i16> @shuffle_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
-; CHECK-LABEL: @shuffle_test_permvar_hi_128_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
-;
-  %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <8 x i16> %passthru, i8 %mask)
-  ret <8 x i16> %a
-}
-
-define <8 x i16> @undef_test_permvar_hi_128(<8 x i16> %a0) {
-; CHECK-LABEL: @undef_test_permvar_hi_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 undef, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <8 x i16> undef, i8 -1)
-  ret <8 x i16> %a
-}
-
-define <8 x i16> @undef_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
-; CHECK-LABEL: @undef_test_permvar_hi_128_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <8 x i16> [[TMP3]]
-;
-  %a = tail call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 undef, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <8 x i16> %passthru, i8 %mask)
-  ret <8 x i16> %a
-}
-
-declare <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
-
-define <16 x i16> @identity_test_permvar_hi_256(<16 x i16> %a0) {
-; CHECK-LABEL: @identity_test_permvar_hi_256(
-; CHECK-NEXT:    ret <16 x i16> [[A0:%.*]]
-;
-  %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, <16 x i16> undef, i16 -1)
-  ret <16 x i16> %a
-}
-
-define <16 x i16> @identity_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
-; CHECK-LABEL: @identity_test_permvar_hi_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[A0:%.*]], <16 x i16> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <16 x i16> [[TMP2]]
-;
-  %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, <16 x i16> %passthru, i16 %mask)
-  ret <16 x i16> %a
-}
-
-define <16 x i16> @zero_test_permvar_hi_256(<16 x i16> %a0) {
-; CHECK-LABEL: @zero_test_permvar_hi_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer, <16 x i16> undef, i16 -1)
-  ret <16 x i16> %a
-}
-
-define <16 x i16> @zero_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
-; CHECK-LABEL: @zero_test_permvar_hi_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
-;
-  %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer, <16 x i16> %passthru, i16 %mask)
-  ret <16 x i16> %a
-}
-
-define <16 x i16> @shuffle_test_permvar_hi_256(<16 x i16> %a0) {
-; CHECK-LABEL: @shuffle_test_permvar_hi_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <16 x i16> undef, i16 -1)
-  ret <16 x i16> %a
-}
-
-define <16 x i16> @shuffle_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
-; CHECK-LABEL: @shuffle_test_permvar_hi_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
-;
-  %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <16 x i16> %passthru, i16 %mask)
-  ret <16 x i16> %a
-}
-
-define <16 x i16> @undef_test_permvar_hi_256(<16 x i16> %a0) {
-; CHECK-LABEL: @undef_test_permvar_hi_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 undef, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <16 x i16> undef, i16 -1)
-  ret <16 x i16> %a
-}
-
-define <16 x i16> @undef_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
-; CHECK-LABEL: @undef_test_permvar_hi_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
-;
-  %a = tail call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 undef, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <16 x i16> %passthru, i16 %mask)
-  ret <16 x i16> %a
-}
-
-declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
-
-define <32 x i16> @identity_test_permvar_hi_512(<32 x i16> %a0) {
-; CHECK-LABEL: @identity_test_permvar_hi_512(
-; CHECK-NEXT:    ret <32 x i16> [[A0:%.*]]
-;
-  %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23, i16 24, i16 25, i16 26, i16 27, i16 28, i16 29, i16 30, i16 31>, <32 x i16> undef, i32 -1)
-  ret <32 x i16> %a
-}
-
-define <32 x i16> @identity_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
-; CHECK-LABEL: @identity_test_permvar_hi_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i16> [[A0:%.*]], <32 x i16> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <32 x i16> [[TMP2]]
-;
-  %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23, i16 24, i16 25, i16 26, i16 27, i16 28, i16 29, i16 30, i16 31>, <32 x i16> %passthru, i32 %mask)
-  ret <32 x i16> %a
-}
-
-define <32 x i16> @zero_test_permvar_hi_512(<32 x i16> %a0) {
-; CHECK-LABEL: @zero_test_permvar_hi_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> zeroinitializer
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer, <32 x i16> undef, i32 -1)
-  ret <32 x i16> %a
-}
-
-define <32 x i16> @zero_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
-; CHECK-LABEL: @zero_test_permvar_hi_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
-;
-  %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer, <32 x i16> %passthru, i32 %mask)
-  ret <32 x i16> %a
-}
-
-define <32 x i16> @shuffle_test_permvar_hi_512(<32 x i16> %a0) {
-; CHECK-LABEL: @shuffle_test_permvar_hi_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 31, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <32 x i16> undef, i32 -1)
-  ret <32 x i16> %a
-}
-
-define <32 x i16> @shuffle_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
-; CHECK-LABEL: @shuffle_test_permvar_hi_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
-;
-  %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 31, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <32 x i16> %passthru, i32 %mask)
-  ret <32 x i16> %a
-}
-
-define <32 x i16> @undef_test_permvar_hi_512(<32 x i16> %a0) {
-; CHECK-LABEL: @undef_test_permvar_hi_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 undef, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <32 x i16> undef, i32 -1)
-  ret <32 x i16> %a
-}
-
-define <32 x i16> @undef_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
-; CHECK-LABEL: @undef_test_permvar_hi_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
-;
-  %a = tail call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 undef, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, <32 x i16> %passthru, i32 %mask)
-  ret <32 x i16> %a
-}
-
-declare <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
-
-define <16 x i8> @identity_test_permvar_qi_128(<16 x i8> %a0) {
-; CHECK-LABEL: @identity_test_permvar_qi_128(
-; CHECK-NEXT:    ret <16 x i8> [[A0:%.*]]
-;
-  %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8> undef, i16 -1)
-  ret <16 x i8> %a
-}
-
-define <16 x i8> @identity_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
-; CHECK-LABEL: @identity_test_permvar_qi_128_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[A0:%.*]], <16 x i8> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
-;
-  %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>, <16 x i8> %passthru, i16 %mask)
-  ret <16 x i8> %a
-}
-
-define <16 x i8> @zero_test_permvar_qi_128(<16 x i8> %a0) {
-; CHECK-LABEL: @zero_test_permvar_qi_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer
-; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
-;
-  %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i8> undef, i16 -1)
-  ret <16 x i8> %a
-}
-
-define <16 x i8> @zero_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
-; CHECK-LABEL: @zero_test_permvar_qi_128_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
-;
-  %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i8> %passthru, i16 %mask)
-  ret <16 x i8> %a
-}
-
-define <16 x i8> @shuffle_test_permvar_qi_128(<16 x i8> %a0) {
-; CHECK-LABEL: @shuffle_test_permvar_qi_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
-;
-  %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> undef, i16 -1)
-  ret <16 x i8> %a
-}
-
-define <16 x i8> @shuffle_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
-; CHECK-LABEL: @shuffle_test_permvar_qi_128_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
-;
-  %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> %passthru, i16 %mask)
-  ret <16 x i8> %a
-}
-
-define <16 x i8> @undef_test_permvar_qi_128(<16 x i8> %a0) {
-; CHECK-LABEL: @undef_test_permvar_qi_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
-;
-  %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 undef, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> undef, i16 -1)
-  ret <16 x i8> %a
-}
-
-define <16 x i8> @undef_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
-; CHECK-LABEL: @undef_test_permvar_qi_128_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <16 x i8> [[TMP3]]
-;
-  %a = tail call <16 x i8> @llvm.x86.avx512.mask.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 undef, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <16 x i8> %passthru, i16 %mask)
-  ret <16 x i8> %a
-}
-
-declare <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
-
-define <32 x i8> @identity_test_permvar_qi_256(<32 x i8> %a0) {
-; CHECK-LABEL: @identity_test_permvar_qi_256(
-; CHECK-NEXT:    ret <32 x i8> [[A0:%.*]]
-;
-  %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, <32 x i8> undef, i32 -1)
-  ret <32 x i8> %a
-}
-
-define <32 x i8> @identity_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
-; CHECK-LABEL: @identity_test_permvar_qi_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i8> [[A0:%.*]], <32 x i8> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <32 x i8> [[TMP2]]
-;
-  %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>, <32 x i8> %passthru, i32 %mask)
-  ret <32 x i8> %a
-}
-
-define <32 x i8> @zero_test_permvar_qi_256(<32 x i8> %a0) {
-; CHECK-LABEL: @zero_test_permvar_qi_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
-; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
-;
-  %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer, <32 x i8> undef, i32 -1)
-  ret <32 x i8> %a
-}
-
-define <32 x i8> @zero_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
-; CHECK-LABEL: @zero_test_permvar_qi_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <32 x i8> [[TMP3]]
-;
-  %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer, <32 x i8> %passthru, i32 %mask)
-  ret <32 x i8> %a
-}
-
-define <32 x i8> @shuffle_test_permvar_qi_256(<32 x i8> %a0) {
-; CHECK-LABEL: @shuffle_test_permvar_qi_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
-;
-  %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <32 x i8> undef, i32 -1)
-  ret <32 x i8> %a
-}
-
-define <32 x i8> @shuffle_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
-; CHECK-LABEL: @shuffle_test_permvar_qi_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <32 x i8> [[TMP3]]
-;
-  %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <32 x i8> %passthru, i32 %mask)
-  ret <32 x i8> %a
-}
-
-define <32 x i8> @undef_test_permvar_qi_256(<32 x i8> %a0) {
-; CHECK-LABEL: @undef_test_permvar_qi_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
-;
-  %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 undef, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <32 x i8> undef, i32 -1)
-  ret <32 x i8> %a
-}
-
-define <32 x i8> @undef_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
-; CHECK-LABEL: @undef_test_permvar_qi_256_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <32 x i8> [[TMP3]]
-;
-  %a = tail call <32 x i8> @llvm.x86.avx512.mask.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 undef, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <32 x i8> %passthru, i32 %mask)
-  ret <32 x i8> %a
-}
-
-declare <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
-
-define <64 x i8> @identity_test_permvar_qi_512(<64 x i8> %a0) {
-; CHECK-LABEL: @identity_test_permvar_qi_512(
-; CHECK-NEXT:    ret <64 x i8> [[A0:%.*]]
-;
-  %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47, i8 48, i8 49, i8 50, i8 51, i8 52, i8 53, i8 54, i8 55, i8 56, i8 57, i8 58, i8 59, i8 60, i8 61, i8 62, i8 63>, <64 x i8> undef, i64 -1)
-  ret <64 x i8> %a
-}
-
-define <64 x i8> @identity_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
-; CHECK-LABEL: @identity_test_permvar_qi_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
-; CHECK-NEXT:    [[TMP2:%.*]] = select <64 x i1> [[TMP1]], <64 x i8> [[A0:%.*]], <64 x i8> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <64 x i8> [[TMP2]]
-;
-  %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47, i8 48, i8 49, i8 50, i8 51, i8 52, i8 53, i8 54, i8 55, i8 56, i8 57, i8 58, i8 59, i8 60, i8 61, i8 62, i8 63>, <64 x i8> %passthru, i64 %mask)
-  ret <64 x i8> %a
-}
-
-define <64 x i8> @zero_test_permvar_qi_512(<64 x i8> %a0) {
-; CHECK-LABEL: @zero_test_permvar_qi_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> zeroinitializer
-; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
-;
-  %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer, <64 x i8> undef, i64 -1)
-  ret <64 x i8> %a
-}
-
-define <64 x i8> @zero_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
-; CHECK-LABEL: @zero_test_permvar_qi_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> zeroinitializer
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <64 x i8> [[TMP3]]
-;
-  %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer, <64 x i8> %passthru, i64 %mask)
-  ret <64 x i8> %a
-}
-
-define <64 x i8> @shuffle_test_permvar_qi_512(<64 x i8> %a0) {
-; CHECK-LABEL: @shuffle_test_permvar_qi_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
-;
-  %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 63, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <64 x i8> undef, i64 -1)
-  ret <64 x i8> %a
-}
-
-define <64 x i8> @shuffle_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
-; CHECK-LABEL: @shuffle_test_permvar_qi_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <64 x i8> [[TMP3]]
-;
-  %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 63, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <64 x i8> %passthru, i64 %mask)
-  ret <64 x i8> %a
-}
-
-define <64 x i8> @undef_test_permvar_qi_512(<64 x i8> %a0) {
-; CHECK-LABEL: @undef_test_permvar_qi_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> <i32 undef, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
-;
-  %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 undef, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <64 x i8> undef, i64 -1)
-  ret <64 x i8> %a
-}
-
-define <64 x i8> @undef_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
-; CHECK-LABEL: @undef_test_permvar_qi_512_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> <i32 undef, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]]
-; CHECK-NEXT:    ret <64 x i8> [[TMP3]]
-;
-  %a = tail call <64 x i8> @llvm.x86.avx512.mask.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 undef, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>, <64 x i8> %passthru, i64 %mask)
-  ret <64 x i8> %a
-}
-
-declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
-
-define <16 x float> @test_add_ps(<16 x float> %a, <16 x float> %b) {
-; CHECK-LABEL: @test_add_ps(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd <16 x float> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    ret <16 x float> [[TMP1]]
-;
-  %1 = tail call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 4)
-  ret <16 x float> %1
-}
-
-define <16 x float> @test_add_ps_round(<16 x float> %a, <16 x float> %b) {
-; CHECK-LABEL: @test_add_ps_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> undef, i16 -1, i32 8)
-; CHECK-NEXT:    ret <16 x float> [[TMP1]]
-;
-  %1 = tail call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 8)
-  ret <16 x float> %1
-}
-
-define <16 x float> @test_add_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
-; CHECK-LABEL: @test_add_ps_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd <16 x float> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
-; CHECK-NEXT:    ret <16 x float> [[TMP3]]
-;
-  %1 = tail call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
-  ret <16 x float> %1
-}
-
-define <16 x float> @test_add_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
-; CHECK-LABEL: @test_add_ps_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> [[C:%.*]], i16 [[MASK:%.*]], i32 8)
-; CHECK-NEXT:    ret <16 x float> [[TMP1]]
-;
-  %1 = tail call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 8)
-  ret <16 x float> %1
-}
-
-declare <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
-
-define <8 x double> @test_add_pd(<8 x double> %a, <8 x double> %b) {
-; CHECK-LABEL: @test_add_pd(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd <8 x double> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    ret <8 x double> [[TMP1]]
-;
-  %1 = tail call <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 4)
-  ret <8 x double> %1
-}
-
-define <8 x double> @test_add_pd_round(<8 x double> %a, <8 x double> %b) {
-; CHECK-LABEL: @test_add_pd_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> undef, i8 -1, i32 8)
-; CHECK-NEXT:    ret <8 x double> [[TMP1]]
-;
-  %1 = tail call <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 8)
-  ret <8 x double> %1
-}
-
-define <8 x double> @test_add_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_add_pd_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd <8 x double> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
-; CHECK-NEXT:    ret <8 x double> [[TMP3]]
-;
-  %1 = tail call <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 4)
-  ret <8 x double> %1
-}
-
-define <8 x double> @test_add_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_add_pd_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT:    ret <8 x double> [[TMP1]]
-;
-  %1 = tail call <8 x double> @llvm.x86.avx512.mask.add.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 8)
-  ret <8 x double> %1
-}
-
-declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
-
-define <16 x float> @test_sub_ps(<16 x float> %a, <16 x float> %b) {
-; CHECK-LABEL: @test_sub_ps(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub <16 x float> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    ret <16 x float> [[TMP1]]
-;
-  %1 = tail call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 4)
-  ret <16 x float> %1
-}
-
-define <16 x float> @test_sub_ps_round(<16 x float> %a, <16 x float> %b) {
-; CHECK-LABEL: @test_sub_ps_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> undef, i16 -1, i32 8)
-; CHECK-NEXT:    ret <16 x float> [[TMP1]]
-;
-  %1 = tail call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 8)
-  ret <16 x float> %1
-}
-
-define <16 x float> @test_sub_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
-; CHECK-LABEL: @test_sub_ps_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub <16 x float> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
-; CHECK-NEXT:    ret <16 x float> [[TMP3]]
-;
-  %1 = tail call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
-  ret <16 x float> %1
-}
-
-define <16 x float> @test_sub_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
-; CHECK-LABEL: @test_sub_ps_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> [[C:%.*]], i16 [[MASK:%.*]], i32 8)
-; CHECK-NEXT:    ret <16 x float> [[TMP1]]
-;
-  %1 = tail call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 8)
-  ret <16 x float> %1
-}
-
-declare <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
-
-define <8 x double> @test_sub_pd(<8 x double> %a, <8 x double> %b) {
-; CHECK-LABEL: @test_sub_pd(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub <8 x double> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    ret <8 x double> [[TMP1]]
-;
-  %1 = tail call <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 4)
-  ret <8 x double> %1
-}
-
-define <8 x double> @test_sub_pd_round(<8 x double> %a, <8 x double> %b) {
-; CHECK-LABEL: @test_sub_pd_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> undef, i8 -1, i32 8)
-; CHECK-NEXT:    ret <8 x double> [[TMP1]]
-;
-  %1 = tail call <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 8)
-  ret <8 x double> %1
-}
-
-define <8 x double> @test_sub_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_sub_pd_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub <8 x double> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
-; CHECK-NEXT:    ret <8 x double> [[TMP3]]
-;
-  %1 = tail call <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 4)
-  ret <8 x double> %1
-}
-
-define <8 x double> @test_sub_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_sub_pd_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT:    ret <8 x double> [[TMP1]]
-;
-  %1 = tail call <8 x double> @llvm.x86.avx512.mask.sub.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 8)
-  ret <8 x double> %1
-}
-
-declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
-
-define <16 x float> @test_mul_ps(<16 x float> %a, <16 x float> %b) {
-; CHECK-LABEL: @test_mul_ps(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul <16 x float> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    ret <16 x float> [[TMP1]]
-;
-  %1 = tail call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 4)
-  ret <16 x float> %1
-}
-
-define <16 x float> @test_mul_ps_round(<16 x float> %a, <16 x float> %b) {
-; CHECK-LABEL: @test_mul_ps_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> undef, i16 -1, i32 8)
-; CHECK-NEXT:    ret <16 x float> [[TMP1]]
-;
-  %1 = tail call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 8)
-  ret <16 x float> %1
-}
-
-define <16 x float> @test_mul_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
-; CHECK-LABEL: @test_mul_ps_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul <16 x float> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
-; CHECK-NEXT:    ret <16 x float> [[TMP3]]
-;
-  %1 = tail call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
-  ret <16 x float> %1
-}
-
-define <16 x float> @test_mul_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
-; CHECK-LABEL: @test_mul_ps_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> [[C:%.*]], i16 [[MASK:%.*]], i32 8)
-; CHECK-NEXT:    ret <16 x float> [[TMP1]]
-;
-  %1 = tail call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 8)
-  ret <16 x float> %1
-}
-
-declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
-
-define <8 x double> @test_mul_pd(<8 x double> %a, <8 x double> %b) {
-; CHECK-LABEL: @test_mul_pd(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul <8 x double> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    ret <8 x double> [[TMP1]]
-;
-  %1 = tail call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 4)
-  ret <8 x double> %1
-}
-
-define <8 x double> @test_mul_pd_round(<8 x double> %a, <8 x double> %b) {
-; CHECK-LABEL: @test_mul_pd_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> undef, i8 -1, i32 8)
-; CHECK-NEXT:    ret <8 x double> [[TMP1]]
-;
-  %1 = tail call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 8)
-  ret <8 x double> %1
-}
-
-define <8 x double> @test_mul_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_mul_pd_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul <8 x double> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
-; CHECK-NEXT:    ret <8 x double> [[TMP3]]
-;
-  %1 = tail call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 4)
-  ret <8 x double> %1
-}
-
-define <8 x double> @test_mul_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_mul_pd_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT:    ret <8 x double> [[TMP1]]
-;
-  %1 = tail call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 8)
-  ret <8 x double> %1
-}
-
-declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
-
-define <16 x float> @test_div_ps(<16 x float> %a, <16 x float> %b) {
-; CHECK-LABEL: @test_div_ps(
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <16 x float> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    ret <16 x float> [[TMP1]]
-;
-  %1 = tail call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 4)
-  ret <16 x float> %1
-}
-
-define <16 x float> @test_div_ps_round(<16 x float> %a, <16 x float> %b) {
-; CHECK-LABEL: @test_div_ps_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> undef, i16 -1, i32 8)
-; CHECK-NEXT:    ret <16 x float> [[TMP1]]
-;
-  %1 = tail call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> undef, i16 -1, i32 8)
-  ret <16 x float> %1
-}
-
-define <16 x float> @test_div_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
-; CHECK-LABEL: @test_div_ps_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <16 x float> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
-; CHECK-NEXT:    ret <16 x float> [[TMP3]]
-;
-  %1 = tail call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 4)
-  ret <16 x float> %1
-}
-
-define <16 x float> @test_div_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
-; CHECK-LABEL: @test_div_ps_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], <16 x float> [[C:%.*]], i16 [[MASK:%.*]], i32 8)
-; CHECK-NEXT:    ret <16 x float> [[TMP1]]
-;
-  %1 = tail call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask, i32 8)
-  ret <16 x float> %1
-}
-
-declare <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
-
-define <8 x double> @test_div_pd(<8 x double> %a, <8 x double> %b) {
-; CHECK-LABEL: @test_div_pd(
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <8 x double> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    ret <8 x double> [[TMP1]]
-;
-  %1 = tail call <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 4)
-  ret <8 x double> %1
-}
-
-define <8 x double> @test_div_pd_round(<8 x double> %a, <8 x double> %b) {
-; CHECK-LABEL: @test_div_pd_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> undef, i8 -1, i32 8)
-; CHECK-NEXT:    ret <8 x double> [[TMP1]]
-;
-  %1 = tail call <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> undef, i8 -1, i32 8)
-  ret <8 x double> %1
-}
-
-define <8 x double> @test_div_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_div_pd_mask(
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv <8 x double> [[A:%.*]], [[B:%.*]]
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
-; CHECK-NEXT:    [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
-; CHECK-NEXT:    ret <8 x double> [[TMP3]]
-;
-  %1 = tail call <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 4)
-  ret <8 x double> %1
-}
-
-define <8 x double> @test_div_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
-; CHECK-LABEL: @test_div_pd_mask_round(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], <8 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
-; CHECK-NEXT:    ret <8 x double> [[TMP1]]
-;
-  %1 = tail call <8 x double> @llvm.x86.avx512.mask.div.pd.512(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask, i32 8)
-  ret <8 x double> %1
-}
-
-declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32)
-
-define i32 @test_comi_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_comi_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i32 0, i32 4)
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %4, <4 x float> %8, i32 0, i32 4)
-  ret i32 %9
-}
-
-declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32)
-
-define i32 @test_comi_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_comi_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]], i32 0, i32 4)
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %2, <2 x double> %4, i32 0, i32 4)
-  ret i32 %5
-}

Removed: llvm/trunk/test/Transforms/InstCombine/x86-crc32-demanded.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-crc32-demanded.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-crc32-demanded.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-crc32-demanded.ll (removed)
@@ -1,17 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-; crc32 with 64-bit destination zeros high 32-bit.
-; rdar://9467055
-
-define i64 @test() nounwind {
-entry:
-; CHECK: test
-; CHECK: tail call i64 @llvm.x86.sse42.crc32.64.64
-; CHECK-NOT: and
-; CHECK: ret
-  %0 = tail call i64 @llvm.x86.sse42.crc32.64.64(i64 0, i64 4) nounwind
-  %1 = and i64 %0, 4294967295
-  ret i64 %1
-}
-
-declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind readnone

Removed: llvm/trunk/test/Transforms/InstCombine/x86-f16c.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-f16c.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-f16c.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-f16c.ll (removed)
@@ -1,68 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>)
-declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>)
-
-;
-; Vector Demanded Bits
-;
-
-; Only bottom 4 elements required.
-define <4 x float> @demand_vcvtph2ps_128(<8 x i16> %A) {
-; CHECK-LABEL: @demand_vcvtph2ps_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %A)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-  %2 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1)
-  ret <4 x float> %2
-}
-
-; All 8 elements required.
-define <8 x float> @demand_vcvtph2ps_256(<8 x i16> %A) {
-; CHECK-LABEL: @demand_vcvtph2ps_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> [[TMP1]])
-; CHECK-NEXT:    ret <8 x float> [[TMP2]]
-;
-  %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-  %2 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
-  ret <8 x float> %2
-}
-
-;
-; Constant Folding
-;
-
-define <4 x float> @fold_vcvtph2ps_128() {
-; CHECK-LABEL: @fold_vcvtph2ps_128(
-; CHECK-NEXT:    ret <4 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00>
-;
-  %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> <i16 0, i16 14336, i16 15360, i16 32768, i16 16384, i16 31743, i16 48128, i16 49152>)
-  ret <4 x float> %1
-}
-
-define <8 x float> @fold_vcvtph2ps_256() {
-; CHECK-LABEL: @fold_vcvtph2ps_256(
-; CHECK-NEXT:    ret <8 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00, float 2.000000e+00, float 6.550400e+04, float -1.000000e+00, float -2.000000e+00>
-;
-  %1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> <i16 0, i16 14336, i16 15360, i16 32768, i16 16384, i16 31743, i16 48128, i16 49152>)
-  ret <8 x float> %1
-}
-
-define <4 x float> @fold_vcvtph2ps_128_zero() {
-; CHECK-LABEL: @fold_vcvtph2ps_128_zero(
-; CHECK-NEXT:    ret <4 x float> zeroinitializer
-;
-  %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
-  ret <4 x float> %1
-}
-
-define <8 x float> @fold_vcvtph2ps_256_zero() {
-; CHECK-LABEL: @fold_vcvtph2ps_256_zero(
-; CHECK-NEXT:    ret <8 x float> zeroinitializer
-;
-  %1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
-  ret <8 x float> %1
-}

Removed: llvm/trunk/test/Transforms/InstCombine/x86-fma.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-fma.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-fma.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-fma.ll (removed)
@@ -1,315 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-declare <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>)
-
-define <4 x float> @test_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; CHECK-LABEL: @test_vfmadd_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1
-  %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
-  %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
-  %res = tail call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %3, <4 x float> %6)
-  ret <4 x float> %res
-}
-
-define float @test_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; CHECK-LABEL: @test_vfmadd_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
-; CHECK-NEXT:    ret float [[TMP2]]
-;
-  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
-  %5 = extractelement <4 x float> %4, i32 0
-  ret float %5
-}
-
-define float @test_vfmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; CHECK-LABEL: @test_vfmadd_ss_1(
-; CHECK-NEXT:    ret float 1.000000e+00
-;
-  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.fma.vfmadd.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
-  %5 = extractelement <4 x float> %4, i32 1
-  ret float %5
-}
-
-declare <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>)
-
-define <2 x double> @test_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; CHECK-LABEL: @test_vfmadd_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
-  %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
-  %res = tail call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %1, <2 x double> %2)
-  ret <2 x double> %res
-}
-
-define double @test_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; CHECK-LABEL: @test_vfmadd_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
-; CHECK-NEXT:    ret double [[TMP2]]
-;
-  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
-  %3 = extractelement <2 x double> %2, i32 0
-  ret double %3
-}
-
-define double @test_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; CHECK-LABEL: @test_vfmadd_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.fma.vfmadd.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
-  %3 = extractelement <2 x double> %2, i32 1
-  ret double %3
-}
-
-declare <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>)
-
-define <4 x float> @test_vfmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; CHECK-LABEL: @test_vfmsub_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1
-  %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
-  %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
-  %res = tail call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %3, <4 x float> %6)
-  ret <4 x float> %res
-}
-
-define float @test_vfmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; CHECK-LABEL: @test_vfmsub_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
-; CHECK-NEXT:    ret float [[TMP2]]
-;
-  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
-  %5 = extractelement <4 x float> %4, i32 0
-  ret float %5
-}
-
-define float @test_vfmsub_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; CHECK-LABEL: @test_vfmsub_ss_1(
-; CHECK-NEXT:    ret float 1.000000e+00
-;
-  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.fma.vfmsub.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
-  %5 = extractelement <4 x float> %4, i32 1
-  ret float %5
-}
-
-declare <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>)
-
-define <2 x double> @test_vfmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; CHECK-LABEL: @test_vfmsub_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
-  %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
-  %res = tail call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %1, <2 x double> %2)
-  ret <2 x double> %res
-}
-
-define double @test_vfmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; CHECK-LABEL: @test_vfmsub_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
-; CHECK-NEXT:    ret double [[TMP2]]
-;
-  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
-  %3 = extractelement <2 x double> %2, i32 0
-  ret double %3
-}
-
-define double @test_vfmsub_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; CHECK-LABEL: @test_vfmsub_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.fma.vfmsub.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
-  %3 = extractelement <2 x double> %2, i32 1
-  ret double %3
-}
-
-declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float>, <4 x float>, <4 x float>)
-
-define <4 x float> @test_vfnmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; CHECK-LABEL: @test_vfnmadd_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1
-  %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
-  %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
-  %res = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %3, <4 x float> %6)
-  ret <4 x float> %res
-}
-
-define float @test_vfnmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; CHECK-LABEL: @test_vfnmadd_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
-; CHECK-NEXT:    ret float [[TMP2]]
-;
-  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
-  %5 = extractelement <4 x float> %4, i32 0
-  ret float %5
-}
-
-define float @test_vfnmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; CHECK-LABEL: @test_vfnmadd_ss_1(
-; CHECK-NEXT:    ret float 1.000000e+00
-;
-  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
-  %5 = extractelement <4 x float> %4, i32 1
-  ret float %5
-}
-
-declare <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double>, <2 x double>, <2 x double>)
-
-define <2 x double> @test_vfnmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; CHECK-LABEL: @test_vfnmadd_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
-  %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
-  %res = tail call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %1, <2 x double> %2)
-  ret <2 x double> %res
-}
-
-define double @test_vfnmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; CHECK-LABEL: @test_vfnmadd_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
-; CHECK-NEXT:    ret double [[TMP2]]
-;
-  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
-  %3 = extractelement <2 x double> %2, i32 0
-  ret double %3
-}
-
-define double @test_vfnmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; CHECK-LABEL: @test_vfnmadd_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.fma.vfnmadd.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
-  %3 = extractelement <2 x double> %2, i32 1
-  ret double %3
-}
-
-declare <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>)
-
-define <4 x float> @test_vfnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; CHECK-LABEL: @test_vfnmsub_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1
-  %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
-  %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
-  %res = tail call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %3, <4 x float> %6)
-  ret <4 x float> %res
-}
-
-define float @test_vfnmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; CHECK-LABEL: @test_vfnmsub_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
-; CHECK-NEXT:    ret float [[TMP2]]
-;
-  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
-  %5 = extractelement <4 x float> %4, i32 0
-  ret float %5
-}
-
-define float @test_vfnmsub_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
-; CHECK-LABEL: @test_vfnmsub_ss_1(
-; CHECK-NEXT:    ret float 1.000000e+00
-;
-  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.fma.vfnmsub.ss(<4 x float> %3, <4 x float> %b, <4 x float> %c)
-  %5 = extractelement <4 x float> %4, i32 1
-  ret float %5
-}
-
-declare <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>)
-
-define <2 x double> @test_vfnmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; CHECK-LABEL: @test_vfnmsub_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
-  %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
-  %res = tail call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %1, <2 x double> %2)
-  ret <2 x double> %res
-}
-
-define double @test_vfnmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; CHECK-LABEL: @test_vfnmsub_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
-; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <2 x double> [[TMP1]], i32 0
-; CHECK-NEXT:    ret double [[TMP2]]
-;
-  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
-  %3 = extractelement <2 x double> %2, i32 0
-  ret double %3
-}
-
-define double @test_vfnmsub_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
-; CHECK-LABEL: @test_vfnmsub_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.fma.vfnmsub.sd(<2 x double> %1, <2 x double> %b, <2 x double> %c)
-  %3 = extractelement <2 x double> %2, i32 1
-  ret double %3
-}

Removed: llvm/trunk/test/Transforms/InstCombine/x86-insertps.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-insertps.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-insertps.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-insertps.ll (removed)
@@ -1,166 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-declare <4 x float> @llvm.x86.sse41.insertps(<4 x float>, <4 x float>, i8) nounwind readnone
-
-; This should never happen, but make sure we don't crash handling a non-constant immediate byte.
-
-define <4 x float> @insertps_non_const_imm(<4 x float> %v1, <4 x float> %v2, i8 %c) {
-; CHECK-LABEL: @insertps_non_const_imm(
-; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
-; CHECK-NEXT:    ret <4 x float> [[RES]]
-;
-  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 %c)
-  ret <4 x float> %res
-
-}
-
-; If all zero mask bits are set, return a zero regardless of the other control bits.
-
-define <4 x float> @insertps_0x0f(<4 x float> %v1, <4 x float> %v2) {
-; CHECK-LABEL: @insertps_0x0f(
-; CHECK-NEXT:    ret <4 x float> zeroinitializer
-;
-  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 15)
-  ret <4 x float> %res
-
-}
-define <4 x float> @insertps_0xff(<4 x float> %v1, <4 x float> %v2) {
-; CHECK-LABEL: @insertps_0xff(
-; CHECK-NEXT:    ret <4 x float> zeroinitializer
-;
-  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 255)
-  ret <4 x float> %res
-
-}
-
-; If some zero mask bits are set that do not override the insertion, we do not change anything.
-
-define <4 x float> @insertps_0x0c(<4 x float> %v1, <4 x float> %v2) {
-; CHECK-LABEL: @insertps_0x0c(
-; CHECK-NEXT:    [[RES:%.*]] = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
-; CHECK-NEXT:    ret <4 x float> [[RES]]
-;
-  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 12)
-  ret <4 x float> %res
-
-}
-
-; ...unless both input vectors are the same operand.
-
-define <4 x float> @insertps_0x15_single_input(<4 x float> %v1) {
-; CHECK-LABEL: @insertps_0x15_single_input(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> <float 0.000000e+00, float undef, float 0.000000e+00, float undef>, <4 x i32> <i32 4, i32 0, i32 6, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 21)
-  ret <4 x float> %res
-
-}
-
-; The zero mask overrides the insertion lane.
-
-define <4 x float> @insertps_0x1a_single_input(<4 x float> %v1) {
-; CHECK-LABEL: @insertps_0x1a_single_input(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> <float undef, float 0.000000e+00, float undef, float 0.000000e+00>, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v1, i8 26)
-  ret <4 x float> %res
-
-}
-
-; The zero mask overrides the insertion lane, so the second input vector is not used.
-
-define <4 x float> @insertps_0xc1(<4 x float> %v1, <4 x float> %v2) {
-; CHECK-LABEL: @insertps_0xc1(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> %v1, float 0.000000e+00, i32 0
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 193)
-  ret <4 x float> %res
-
-}
-
-; If no zero mask bits are set, convert to a shuffle.
-
-define <4 x float> @insertps_0x00(<4 x float> %v1, <4 x float> %v2) {
-; CHECK-LABEL: @insertps_0x00(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 0)
-  ret <4 x float> %res
-
-}
-
-define <4 x float> @insertps_0x10(<4 x float> %v1, <4 x float> %v2) {
-; CHECK-LABEL: @insertps_0x10(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 16)
-  ret <4 x float> %res
-
-}
-
-define <4 x float> @insertps_0x20(<4 x float> %v1, <4 x float> %v2) {
-; CHECK-LABEL: @insertps_0x20(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 4, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 32)
-  ret <4 x float> %res
-
-}
-
-define <4 x float> @insertps_0x30(<4 x float> %v1, <4 x float> %v2) {
-; CHECK-LABEL: @insertps_0x30(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 48)
-  ret <4 x float> %res
-
-}
-
-define <4 x float> @insertps_0xc0(<4 x float> %v1, <4 x float> %v2) {
-; CHECK-LABEL: @insertps_0xc0(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 7, i32 1, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 192)
-  ret <4 x float> %res
-
-}
-
-define <4 x float> @insertps_0xd0(<4 x float> %v1, <4 x float> %v2) {
-; CHECK-LABEL: @insertps_0xd0(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 7, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 208)
-  ret <4 x float> %res
-
-}
-
-define <4 x float> @insertps_0xe0(<4 x float> %v1, <4 x float> %v2) {
-; CHECK-LABEL: @insertps_0xe0(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 7, i32 3>
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 224)
-  ret <4 x float> %res
-
-}
-
-define <4 x float> @insertps_0xf0(<4 x float> %v1, <4 x float> %v2) {
-; CHECK-LABEL: @insertps_0xf0(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v1, <4 x float> %v2, <4 x i32> <i32 0, i32 1, i32 2, i32 7>
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %res = call <4 x float> @llvm.x86.sse41.insertps(<4 x float> %v1, <4 x float> %v2, i8 240)
-  ret <4 x float> %res
-
-}
-

Removed: llvm/trunk/test/Transforms/InstCombine/x86-masked-memops.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-masked-memops.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-masked-memops.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-masked-memops.ll (removed)
@@ -1,328 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-;; MASKED LOADS
-
-; If the mask isn't constant, do nothing.
-
-define <4 x float> @mload(i8* %f, <4 x i32> %mask) {
-; CHECK-LABEL: @mload(
-; CHECK-NEXT:    [[LD:%.*]] = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> %mask)
-; CHECK-NEXT:    ret <4 x float> [[LD]]
-;
-  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> %mask)
-  ret <4 x float> %ld
-
-}
-
-; Zero mask returns a zero vector.
-
-define <4 x float> @mload_zeros(i8* %f) {
-; CHECK-LABEL: @mload_zeros(
-; CHECK-NEXT:    ret <4 x float> zeroinitializer
-;
-  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> zeroinitializer)
-  ret <4 x float> %ld
-
-}
-
-; Only the sign bit matters.
-
-define <4 x float> @mload_fake_ones(i8* %f) {
-; CHECK-LABEL: @mload_fake_ones(
-; CHECK-NEXT:    ret <4 x float> zeroinitializer
-;
-  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> <i32 1, i32 2, i32 3, i32 2147483647>)
-  ret <4 x float> %ld
-
-}
-
-; All mask bits are set, so this is just a vector load.
-
-define <4 x float> @mload_real_ones(i8* %f) {
-; CHECK-LABEL: @mload_real_ones(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* %f to <4 x float>*
-; CHECK-NEXT:    [[UNMASKEDLOAD:%.*]] = load <4 x float>, <4 x float>* [[CASTVEC]], align 1
-; CHECK-NEXT:    ret <4 x float> [[UNMASKEDLOAD]]
-;
-  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> <i32 -1, i32 -2, i32 -3, i32 2147483648>)
-  ret <4 x float> %ld
-
-}
-
-; It's a constant mask, so convert to an LLVM intrinsic. The backend should optimize further.
-
-define <4 x float> @mload_one_one(i8* %f) {
-; CHECK-LABEL: @mload_one_one(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* %f to <4 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[CASTVEC]], i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> zeroinitializer)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> <i32 0, i32 0, i32 0, i32 -1>)
-  ret <4 x float> %ld
-
-}
-
-; Try doubles.
-
-define <2 x double> @mload_one_one_double(i8* %f) {
-; CHECK-LABEL: @mload_one_one_double(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* %f to <2 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* [[CASTVEC]], i32 1, <2 x i1> <i1 true, i1 false>, <2 x double> zeroinitializer)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %ld = tail call <2 x double> @llvm.x86.avx.maskload.pd(i8* %f, <2 x i64> <i64 -1, i64 0>)
-  ret <2 x double> %ld
-
-}
-
-; Try 256-bit FP ops.
-
-define <8 x float> @mload_v8f32(i8* %f) {
-; CHECK-LABEL: @mload_v8f32(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* %f to <8 x float>*
-; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* [[CASTVEC]], i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x float> zeroinitializer)
-; CHECK-NEXT:    ret <8 x float> [[TMP1]]
-;
-  %ld = tail call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %f, <8 x i32> <i32 0, i32 0, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 0>)
-  ret <8 x float> %ld
-
-}
-
-define <4 x double> @mload_v4f64(i8* %f) {
-; CHECK-LABEL: @mload_v4f64(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* %f to <4 x double>*
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* [[CASTVEC]], i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> zeroinitializer)
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %ld = tail call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %f, <4 x i64> <i64 -1, i64 0, i64 0, i64 0>)
-  ret <4 x double> %ld
-
-}
-
-; Try the AVX2 variants.
-
-define <4 x i32> @mload_v4i32(i8* %f) {
-; CHECK-LABEL: @mload_v4i32(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* %f to <4 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* [[CASTVEC]], i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer)
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %ld = tail call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %f, <4 x i32> <i32 0, i32 0, i32 0, i32 -1>)
-  ret <4 x i32> %ld
-
-}
-
-define <2 x i64> @mload_v2i64(i8* %f) {
-; CHECK-LABEL: @mload_v2i64(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* %f to <2 x i64>*
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* [[CASTVEC]], i32 1, <2 x i1> <i1 true, i1 false>, <2 x i64> zeroinitializer)
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %ld = tail call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %f, <2 x i64> <i64 -1, i64 0>)
-  ret <2 x i64> %ld
-
-}
-
-define <8 x i32> @mload_v8i32(i8* %f) {
-; CHECK-LABEL: @mload_v8i32(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* %f to <8 x i32>*
-; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[CASTVEC]], i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i32> zeroinitializer)
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %ld = tail call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %f, <8 x i32> <i32 0, i32 0, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 0>)
-  ret <8 x i32> %ld
-
-}
-
-define <4 x i64> @mload_v4i64(i8* %f) {
-; CHECK-LABEL: @mload_v4i64(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* %f to <4 x i64>*
-; CHECK-NEXT:    [[TMP1:%.*]] = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* [[CASTVEC]], i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> zeroinitializer)
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %ld = tail call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %f, <4 x i64> <i64 -1, i64 0, i64 0, i64 0>)
-  ret <4 x i64> %ld
-
-}
-
-
-;; MASKED STORES
-
-; If the mask isn't constant, do nothing.
-
-define void @mstore(i8* %f, <4 x i32> %mask, <4 x float> %v) {
-; CHECK-LABEL: @mstore(
-; CHECK-NEXT:    tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> %mask, <4 x float> %v)
-; CHECK-NEXT:    ret void
-;
-  tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> %mask, <4 x float> %v)
-  ret void
-
-}
-
-; Zero mask is a nop.
-
-define void @mstore_zeros(i8* %f, <4 x float> %v)  {
-; CHECK-LABEL: @mstore_zeros(
-; CHECK-NEXT:    ret void
-;
-  tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> zeroinitializer, <4 x float> %v)
-  ret void
-
-}
-
-; Only the sign bit matters.
-
-define void @mstore_fake_ones(i8* %f, <4 x float> %v) {
-; CHECK-LABEL: @mstore_fake_ones(
-; CHECK-NEXT:    ret void
-;
-  tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> <i32 1, i32 2, i32 3, i32 2147483647>, <4 x float> %v)
-  ret void
-
-}
-
-; All mask bits are set, so this is just a vector store.
-
-define void @mstore_real_ones(i8* %f, <4 x float> %v) {
-; CHECK-LABEL: @mstore_real_ones(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* %f to <4 x float>*
-; CHECK-NEXT:    store <4 x float> %v, <4 x float>* [[CASTVEC]], align 1
-; CHECK-NEXT:    ret void
-;
-  tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> <i32 -1, i32 -2, i32 -3, i32 -2147483648>, <4 x float> %v)
-  ret void
-
-}
-
-; It's a constant mask, so convert to an LLVM intrinsic. The backend should optimize further.
-
-define void @mstore_one_one(i8* %f, <4 x float> %v) {
-; CHECK-LABEL: @mstore_one_one(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* %f to <4 x float>*
-; CHECK-NEXT:    call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v, <4 x float>* [[CASTVEC]], i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>)
-; CHECK-NEXT:    ret void
-;
-  tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> <i32 0, i32 0, i32 0, i32 -1>, <4 x float> %v)
-  ret void
-
-}
-
-; Try doubles.
-
-define void @mstore_one_one_double(i8* %f, <2 x double> %v) {
-; CHECK-LABEL: @mstore_one_one_double(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* %f to <2 x double>*
-; CHECK-NEXT:    call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %v, <2 x double>* [[CASTVEC]], i32 1, <2 x i1> <i1 true, i1 false>)
-; CHECK-NEXT:    ret void
-;
-  tail call void @llvm.x86.avx.maskstore.pd(i8* %f, <2 x i64> <i64 -1, i64 0>, <2 x double> %v)
-  ret void
-
-}
-
-; Try 256-bit FP ops.
-
-define void @mstore_v8f32(i8* %f, <8 x float> %v) {
-; CHECK-LABEL: @mstore_v8f32(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* %f to <8 x float>*
-; CHECK-NEXT:    call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %v, <8 x float>* [[CASTVEC]], i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
-; CHECK-NEXT:    ret void
-;
-  tail call void @llvm.x86.avx.maskstore.ps.256(i8* %f, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 -1, i32 -2, i32 -3, i32 -4>, <8 x float> %v)
-  ret void
-
-}
-
-define void @mstore_v4f64(i8* %f, <4 x double> %v) {
-; CHECK-LABEL: @mstore_v4f64(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* %f to <4 x double>*
-; CHECK-NEXT:    call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> %v, <4 x double>* [[CASTVEC]], i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>)
-; CHECK-NEXT:    ret void
-;
-  tail call void @llvm.x86.avx.maskstore.pd.256(i8* %f, <4 x i64> <i64 -1, i64 0, i64 1, i64 2>, <4 x double> %v)
-  ret void
-
-}
-
-; Try the AVX2 variants.
-
-define void @mstore_v4i32(i8* %f, <4 x i32> %v) {
-; CHECK-LABEL: @mstore_v4i32(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* %f to <4 x i32>*
-; CHECK-NEXT:    call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v, <4 x i32>* [[CASTVEC]], i32 1, <4 x i1> <i1 false, i1 false, i1 true, i1 true>)
-; CHECK-NEXT:    ret void
-;
-  tail call void @llvm.x86.avx2.maskstore.d(i8* %f, <4 x i32> <i32 0, i32 1, i32 -1, i32 -2>, <4 x i32> %v)
-  ret void
-
-}
-
-define void @mstore_v2i64(i8* %f, <2 x i64> %v) {
-; CHECK-LABEL: @mstore_v2i64(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* %f to <2 x i64>*
-; CHECK-NEXT:    call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %v, <2 x i64>* [[CASTVEC]], i32 1, <2 x i1> <i1 true, i1 false>)
-; CHECK-NEXT:    ret void
-;
-  tail call void @llvm.x86.avx2.maskstore.q(i8* %f, <2 x i64> <i64 -1, i64 0>, <2 x i64> %v)
-  ret void
-
-}
-
-define void @mstore_v8i32(i8* %f, <8 x i32> %v) {
-; CHECK-LABEL: @mstore_v8i32(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* %f to <8 x i32>*
-; CHECK-NEXT:    call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %v, <8 x i32>* [[CASTVEC]], i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
-; CHECK-NEXT:    ret void
-;
-  tail call void @llvm.x86.avx2.maskstore.d.256(i8* %f, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 -1, i32 -2, i32 -3, i32 -4>, <8 x i32> %v)
-  ret void
-
-}
-
-define void @mstore_v4i64(i8* %f, <4 x i64> %v) {
-; CHECK-LABEL: @mstore_v4i64(
-; CHECK-NEXT:    [[CASTVEC:%.*]] = bitcast i8* %f to <4 x i64>*
-; CHECK-NEXT:    call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %v, <4 x i64>* [[CASTVEC]], i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>)
-; CHECK-NEXT:    ret void
-;
-  tail call void @llvm.x86.avx2.maskstore.q.256(i8* %f, <4 x i64> <i64 -1, i64 0, i64 1, i64 2>, <4 x i64> %v)
-  ret void
-
-}
-
-; The original SSE2 masked store variant.
-
-define void @mstore_v16i8_sse2_zeros(<16 x i8> %d, i8* %p) {
-; CHECK-LABEL: @mstore_v16i8_sse2_zeros(
-; CHECK-NEXT:    ret void
-;
-  tail call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %d, <16 x i8> zeroinitializer, i8* %p)
-  ret void
-
-}
-
-
-declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>)
-declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>)
-declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>)
-declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>)
-
-declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>)
-declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>)
-declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>)
-declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>)
-
-declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>)
-declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>)
-declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>)
-declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>)
-
-declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>)
-declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>)
-declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>)
-declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>)
-
-declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*)
-

Removed: llvm/trunk/test/Transforms/InstCombine/x86-movmsk.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-movmsk.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-movmsk.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-movmsk.ll (removed)
@@ -1,324 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-;
-; DemandedBits - MOVMSK zeros the upper bits of the result.
-;
-
-define i32 @test_upper_x86_mmx_pmovmskb(x86_mmx %a0) {
-; CHECK-LABEL: @test_upper_x86_mmx_pmovmskb(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
-; CHECK-NEXT:    ret i32 [[TMP1]]
-;
-  %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
-  %2 = and i32 %1, 255
-  ret i32 %2
-}
-
-define i32 @test_upper_x86_sse_movmsk_ps(<4 x float> %a0) {
-; CHECK-LABEL: @test_upper_x86_sse_movmsk_ps(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
-; CHECK-NEXT:    ret i32 [[TMP1]]
-;
-  %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
-  %2 = and i32 %1, 15
-  ret i32 %2
-}
-
-define i32 @test_upper_x86_sse2_movmsk_pd(<2 x double> %a0) {
-; CHECK-LABEL: @test_upper_x86_sse2_movmsk_pd(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
-; CHECK-NEXT:    ret i32 [[TMP1]]
-;
-  %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
-  %2 = and i32 %1, 3
-  ret i32 %2
-}
-
-define i32 @test_upper_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
-; CHECK-LABEL: @test_upper_x86_sse2_pmovmskb_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
-; CHECK-NEXT:    ret i32 [[TMP1]]
-;
-  %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
-  %2 = and i32 %1, 65535
-  ret i32 %2
-}
-
-define i32 @test_upper_x86_avx_movmsk_ps_256(<8 x float> %a0) {
-; CHECK-LABEL: @test_upper_x86_avx_movmsk_ps_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
-; CHECK-NEXT:    ret i32 [[TMP1]]
-;
-  %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
-  %2 = and i32 %1, 255
-  ret i32 %2
-}
-
-define i32 @test_upper_x86_avx_movmsk_pd_256(<4 x double> %a0) {
-; CHECK-LABEL: @test_upper_x86_avx_movmsk_pd_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
-; CHECK-NEXT:    ret i32 [[TMP1]]
-;
-  %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
-  %2 = and i32 %1, 15
-  ret i32 %2
-}
-
-; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register.
-
-;
-; DemandedBits - If we don't use the lower bits then we just return zero.
-;
-
-define i32 @test_lower_x86_mmx_pmovmskb(x86_mmx %a0) {
-; CHECK-LABEL: @test_lower_x86_mmx_pmovmskb(
-; CHECK-NEXT:    ret i32 0
-;
-  %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
-  %2 = and i32 %1, -256
-  ret i32 %2
-}
-
-define i32 @test_lower_x86_sse_movmsk_ps(<4 x float> %a0) {
-; CHECK-LABEL: @test_lower_x86_sse_movmsk_ps(
-; CHECK-NEXT:    ret i32 0
-;
-  %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
-  %2 = and i32 %1, -16
-  ret i32 %2
-}
-
-define i32 @test_lower_x86_sse2_movmsk_pd(<2 x double> %a0) {
-; CHECK-LABEL: @test_lower_x86_sse2_movmsk_pd(
-; CHECK-NEXT:    ret i32 0
-;
-  %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
-  %2 = and i32 %1, -4
-  ret i32 %2
-}
-
-define i32 @test_lower_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
-; CHECK-LABEL: @test_lower_x86_sse2_pmovmskb_128(
-; CHECK-NEXT:    ret i32 0
-;
-  %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
-  %2 = and i32 %1, -65536
-  ret i32 %2
-}
-
-define i32 @test_lower_x86_avx_movmsk_ps_256(<8 x float> %a0) {
-; CHECK-LABEL: @test_lower_x86_avx_movmsk_ps_256(
-; CHECK-NEXT:    ret i32 0
-;
-  %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
-  %2 = and i32 %1, -256
-  ret i32 %2
-}
-
-define i32 @test_lower_x86_avx_movmsk_pd_256(<4 x double> %a0) {
-; CHECK-LABEL: @test_lower_x86_avx_movmsk_pd_256(
-; CHECK-NEXT:    ret i32 0
-;
-  %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
-  %2 = and i32 %1, -16
-  ret i32 %2
-}
-
-; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register.
-
-;
-; Constant Folding (UNDEF -> ZERO)
-;
-
-define i32 @undef_x86_mmx_pmovmskb() {
-; CHECK-LABEL: @undef_x86_mmx_pmovmskb(
-; CHECK-NEXT:    ret i32 0
-;
-  %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx undef)
-  ret i32 %1
-}
-
-define i32 @undef_x86_sse_movmsk_ps() {
-; CHECK-LABEL: @undef_x86_sse_movmsk_ps(
-; CHECK-NEXT:    ret i32 0
-;
-  %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> undef)
-  ret i32 %1
-}
-
-define i32 @undef_x86_sse2_movmsk_pd() {
-; CHECK-LABEL: @undef_x86_sse2_movmsk_pd(
-; CHECK-NEXT:    ret i32 0
-;
-  %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> undef)
-  ret i32 %1
-}
-
-define i32 @undef_x86_sse2_pmovmskb_128() {
-; CHECK-LABEL: @undef_x86_sse2_pmovmskb_128(
-; CHECK-NEXT:    ret i32 0
-;
-  %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> undef)
-  ret i32 %1
-}
-
-define i32 @undef_x86_avx_movmsk_ps_256() {
-; CHECK-LABEL: @undef_x86_avx_movmsk_ps_256(
-; CHECK-NEXT:    ret i32 0
-;
-  %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> undef)
-  ret i32 %1
-}
-
-define i32 @undef_x86_avx_movmsk_pd_256() {
-; CHECK-LABEL: @undef_x86_avx_movmsk_pd_256(
-; CHECK-NEXT:    ret i32 0
-;
-  %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> undef)
-  ret i32 %1
-}
-
-define i32 @undef_x86_avx2_pmovmskb() {
-; CHECK-LABEL: @undef_x86_avx2_pmovmskb(
-; CHECK-NEXT:    ret i32 0
-;
-  %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> undef)
-  ret i32 %1
-}
-
-;
-; Constant Folding (ZERO -> ZERO)
-;
-
-define i32 @zero_x86_mmx_pmovmskb() {
-; CHECK-LABEL: @zero_x86_mmx_pmovmskb(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx bitcast (<1 x i64> zeroinitializer to x86_mmx))
-; CHECK-NEXT:    ret i32 [[TMP1]]
-;
-  %1 = bitcast <1 x i64> zeroinitializer to x86_mmx
-  %2 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %1)
-  ret i32 %2
-}
-
-define i32 @zero_x86_sse_movmsk_ps() {
-; CHECK-LABEL: @zero_x86_sse_movmsk_ps(
-; CHECK-NEXT:    ret i32 0
-;
-  %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> zeroinitializer)
-  ret i32 %1
-}
-
-define i32 @zero_x86_sse2_movmsk_pd() {
-; CHECK-LABEL: @zero_x86_sse2_movmsk_pd(
-; CHECK-NEXT:    ret i32 0
-;
-  %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> zeroinitializer)
-  ret i32 %1
-}
-
-define i32 @zero_x86_sse2_pmovmskb_128() {
-; CHECK-LABEL: @zero_x86_sse2_pmovmskb_128(
-; CHECK-NEXT:    ret i32 0
-;
-  %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> zeroinitializer)
-  ret i32 %1
-}
-
-define i32 @zero_x86_avx_movmsk_ps_256() {
-; CHECK-LABEL: @zero_x86_avx_movmsk_ps_256(
-; CHECK-NEXT:    ret i32 0
-;
-  %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> zeroinitializer)
-  ret i32 %1
-}
-
-define i32 @zero_x86_avx_movmsk_pd_256() {
-; CHECK-LABEL: @zero_x86_avx_movmsk_pd_256(
-; CHECK-NEXT:    ret i32 0
-;
-  %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> zeroinitializer)
-  ret i32 %1
-}
-
-define i32 @zero_x86_avx2_pmovmskb() {
-; CHECK-LABEL: @zero_x86_avx2_pmovmskb(
-; CHECK-NEXT:    ret i32 0
-;
-  %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> zeroinitializer)
-  ret i32 %1
-}
-
-;
-; Constant Folding
-;
-
-define i32 @fold_x86_mmx_pmovmskb() {
-; CHECK-LABEL: @fold_x86_mmx_pmovmskb(
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx bitcast (<8 x i8> <i8 0, i8 -1, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 0> to x86_mmx))
-; CHECK-NEXT:    ret i32 [[TMP1]]
-;
-  %1 = bitcast <8 x i8> <i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256> to x86_mmx
-  %2 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %1)
-  ret i32 %2
-}
-
-define i32 @fold_x86_sse_movmsk_ps() {
-; CHECK-LABEL: @fold_x86_sse_movmsk_ps(
-; CHECK-NEXT:    ret i32 10
-;
-  %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> <float 1.0, float -1.0, float 100.0, float -200.0>)
-  ret i32 %1
-}
-
-define i32 @fold_x86_sse2_movmsk_pd() {
-; CHECK-LABEL: @fold_x86_sse2_movmsk_pd(
-; CHECK-NEXT:    ret i32 2
-;
-  %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> <double 1.0, double -1.0>)
-  ret i32 %1
-}
-
-define i32 @fold_x86_sse2_pmovmskb_128() {
-; CHECK-LABEL: @fold_x86_sse2_pmovmskb_128(
-; CHECK-NEXT:    ret i32 5654
-;
-  %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> <i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256>)
-  ret i32 %1
-}
-
-define i32 @fold_x86_avx_movmsk_ps_256() {
-; CHECK-LABEL: @fold_x86_avx_movmsk_ps_256(
-; CHECK-NEXT:    ret i32 170
-;
-  %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> <float 1.0, float -1.0, float 100.0, float -200.0, float +0.0, float -0.0, float 100000.0, float -5000000.0>)
-  ret i32 %1
-}
-
-define i32 @fold_x86_avx_movmsk_pd_256() {
-; CHECK-LABEL: @fold_x86_avx_movmsk_pd_256(
-; CHECK-NEXT:    ret i32 10
-;
-  %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> <double 1.0, double -1.0, double 100.0, double -200.0>)
-  ret i32 %1
-}
-
-define i32 @fold_x86_avx2_pmovmskb() {
-; CHECK-LABEL: @fold_x86_avx2_pmovmskb(
-; CHECK-NEXT:    ret i32 370546176
-;
-  %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256>)
-  ret i32 %1
-}
-
-declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx)
-
-declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>)
-declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>)
-declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>)
-
-declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>)
-declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>)
-declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>)

Removed: llvm/trunk/test/Transforms/InstCombine/x86-muldq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-muldq.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-muldq.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-muldq.ll (removed)
@@ -1,245 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-;
-; UNDEF Elts
-;
-
-define <2 x i64> @undef_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: @undef_pmuludq_128(
-; CHECK-NEXT:    ret <2 x i64> zeroinitializer
-;
-  %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> undef)
-  ret <2 x i64> %1
-}
-
-define <4 x i64> @undef_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
-; CHECK-LABEL: @undef_pmuludq_256(
-; CHECK-NEXT:    ret <4 x i64> zeroinitializer
-;
-  %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> undef, <8 x i32> undef)
-  ret <4 x i64> %1
-}
-
-define <8 x i64> @undef_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
-; CHECK-LABEL: @undef_pmuludq_512(
-; CHECK-NEXT:    ret <8 x i64> zeroinitializer
-;
-  %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> undef)
-  ret <8 x i64> %1
-}
-
-define <2 x i64> @undef_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: @undef_pmuldq_128(
-; CHECK-NEXT:    ret <2 x i64> zeroinitializer
-;
-  %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> undef, <4 x i32> undef)
-  ret <2 x i64> %1
-}
-
-define <4 x i64> @undef_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
-; CHECK-LABEL: @undef_pmuldq_256(
-; CHECK-NEXT:    ret <4 x i64> zeroinitializer
-;
-  %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> undef)
-  ret <4 x i64> %1
-}
-
-define <8 x i64> @undef_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
-; CHECK-LABEL: @undef_pmuldq_512(
-; CHECK-NEXT:    ret <8 x i64> zeroinitializer
-;
-  %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> undef, <16 x i32> undef)
-  ret <8 x i64> %1
-}
-
-define <2 x i64> @undef_zero_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: @undef_zero_pmuludq_128(
-; CHECK-NEXT:    ret <2 x i64> zeroinitializer
-;
-  %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> undef, <4 x i32> zeroinitializer)
-  ret <2 x i64> %1
-}
-
-define <4 x i64> @undef_zero_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
-; CHECK-LABEL: @undef_zero_pmuludq_256(
-; CHECK-NEXT:    ret <4 x i64> zeroinitializer
-;
-  %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> zeroinitializer, <8 x i32> undef)
-  ret <4 x i64> %1
-}
-
-define <8 x i64> @undef_zero_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
-; CHECK-LABEL: @undef_zero_pmuludq_512(
-; CHECK-NEXT:    ret <8 x i64> zeroinitializer
-;
-  %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> undef, <16 x i32> zeroinitializer)
-  ret <8 x i64> %1
-}
-
-define <2 x i64> @undef_zero_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: @undef_zero_pmuldq_128(
-; CHECK-NEXT:    ret <2 x i64> zeroinitializer
-;
-  %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> zeroinitializer, <4 x i32> undef)
-  ret <2 x i64> %1
-}
-
-define <4 x i64> @undef_zero_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
-; CHECK-LABEL: @undef_zero_pmuldq_256(
-; CHECK-NEXT:    ret <4 x i64> zeroinitializer
-;
-  %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> undef, <8 x i32> zeroinitializer)
-  ret <4 x i64> %1
-}
-
-define <8 x i64> @undef_zero_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
-; CHECK-LABEL: @undef_zero_pmuldq_512(
-; CHECK-NEXT:    ret <8 x i64> zeroinitializer
-;
-  %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> zeroinitializer, <16 x i32> undef)
-  ret <8 x i64> %1
-}
-
-;
-; Constant Folding
-;
-
-define <2 x i64> @fold_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: @fold_pmuludq_128(
-; CHECK-NEXT:    ret <2 x i64> <i64 9223372030412324865, i64 4294967295>
-;
-  %1 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 2147483647, i32 1, i32 1, i32 3>)
-  ret <2 x i64> %1
-}
-
-define <4 x i64> @fold_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
-; CHECK-LABEL: @fold_pmuludq_256(
-; CHECK-NEXT:    ret <4 x i64> zeroinitializer
-;
-  %1 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> zeroinitializer, <8 x i32> zeroinitializer)
-  ret <4 x i64> %1
-}
-
-define <8 x i64> @fold_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
-; CHECK-LABEL: @fold_pmuludq_512(
-; CHECK-NEXT:    ret <8 x i64> <i64 0, i64 0, i64 255, i64 131070, i64 0, i64 -281474976645121, i64 140737488289792, i64 281470681743360>
-;
-  %1 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> <i32 0, i32 0, i32 undef, i32 0, i32 1, i32 1, i32 2, i32 2, i32 undef, i32 undef, i32 -1, i32 -1, i32 65536, i32 -1, i32 -65536, i32 undef>, <16 x i32> <i32 undef, i32 undef, i32 undef, i32 1, i32 255, i32 -256, i32 65535, i32 -65536, i32 0, i32 -1, i32 -65535, i32 -65535, i32 2147483647, i32 2147483648, i32 65536, i32 -65535>)
-  ret <8 x i64> %1
-}
-
-define <2 x i64> @fold_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: @fold_pmuldq_128(
-; CHECK-NEXT:    ret <2 x i64> <i64 0, i64 2>
-;
-  %1 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> <i32 undef, i32 -1, i32 -1, i32 -1>, <4 x i32> <i32 undef, i32 1, i32 -2, i32 3>)
-  ret <2 x i64> %1
-}
-
-define <4 x i64> @fold_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
-; CHECK-LABEL: @fold_pmuldq_256(
-; CHECK-NEXT:    ret <4 x i64> <i64 0, i64 4294836225, i64 140737488289792, i64 -140737488355328>
-;
-  %1 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> <i32 undef, i32 1, i32 -65535, i32 128, i32 65536, i32 2147483647, i32 -2147483648, i32 65536>, <8 x i32> <i32 0, i32 -1, i32 -65535, i32 -65535, i32 2147483647, i32 2147483648, i32 65536, i32 -65535>)
-  ret <4 x i64> %1
-}
-
-define <8 x i64> @fold_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
-; CHECK-LABEL: @fold_pmuldq_512(
-; CHECK-NEXT:    ret <8 x i64> zeroinitializer
-;
-  %1 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> zeroinitializer, <16 x i32> <i32 undef, i32 -1, i32 -3, i32 -1, i32 8, i32 10, i32 -256, i32 65536, i32 undef, i32 1, i32 -65535, i32 128, i32 65536, i32 2147483647, i32 -2147483648, i32 65536>)
-  ret <8 x i64> %1
-}
-
-;
-; PMULUDQ/PMULDQ - only the even elements (0, 2, 4, 6) of the vXi32 inputs are required.
-;
-
-define <2 x i64> @test_demanded_elts_pmuludq_128(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: @test_demanded_elts_pmuludq_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %a0, <4 x i32> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <2 x i64> [[TMP2]], <2 x i64> undef, <2 x i32> zeroinitializer
-; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
-;
-  %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
-  %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
-  %3 = call <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32> %1, <4 x i32> %2)
-  %4 = shufflevector <2 x i64> %3, <2 x i64> undef, <2 x i32> zeroinitializer
-  ret <2 x i64> %4
-}
-
-define <4 x i64> @test_demanded_elts_pmuludq_256(<8 x i32> %a0, <8 x i32> %a1) {
-; CHECK-LABEL: @test_demanded_elts_pmuludq_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 3, i32 undef, i32 5, i32 undef, i32 7, i32 undef>
-; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %a0, <8 x i32> [[TMP1]])
-; CHECK-NEXT:    ret <4 x i64> [[TMP2]]
-;
-  %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
-  %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
-  %3 = call <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32> %1, <8 x i32> %2)
-  ret <4 x i64> %3
-}
-
-define <8 x i64> @test_demanded_elts_pmuludq_512(<16 x i32> %a0, <16 x i32> %a1) {
-; CHECK-LABEL: @test_demanded_elts_pmuludq_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 3, i32 undef, i32 5, i32 undef, i32 7, i32 undef, i32 9, i32 undef, i32 11, i32 undef, i32 13, i32 undef, i32 15, i32 undef>
-; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a0, <16 x i32> [[TMP1]])
-; CHECK-NEXT:    ret <8 x i64> [[TMP2]]
-;
-  %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
-  %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
-  %3 = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %1, <16 x i32> %2)
-  ret <8 x i64> %3
-}
-
-define <2 x i64> @test_demanded_elts_pmuldq_128(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: @test_demanded_elts_pmuldq_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 3, i32 undef>
-; CHECK-NEXT:    [[TMP2:%.*]] = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %a0, <4 x i32> [[TMP1]])
-; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
-;
-  %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
-  %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
-  %3 = call <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32> %1, <4 x i32> %2)
-  ret <2 x i64> %3
-}
-
-define <4 x i64> @test_demanded_elts_pmuldq_256(<8 x i32> %a0, <8 x i32> %a1) {
-; CHECK-LABEL: @test_demanded_elts_pmuldq_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 undef>
-; CHECK-NEXT:    [[TMP2:%.*]] = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %a0, <8 x i32> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i64> [[TMP2]], <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
-; CHECK-NEXT:    ret <4 x i64> [[TMP3]]
-;
-  %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
-  %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
-  %3 = call <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32> %1, <8 x i32> %2)
-  %4 = shufflevector <4 x i64> %3, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 3, i32 3>
-  ret <4 x i64> %4
-}
-
-define <8 x i64> @test_demanded_elts_pmuldq_512(<16 x i32> %a0, <16 x i32> %a1) {
-; CHECK-LABEL: @test_demanded_elts_pmuldq_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 undef, i32 9, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 15, i32 undef>
-; CHECK-NEXT:    [[TMP2:%.*]] = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a0, <16 x i32> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x i64> [[TMP2]], <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 3, i32 3, i32 4, i32 4, i32 7, i32 7>
-; CHECK-NEXT:    ret <8 x i64> [[TMP3]]
-;
-  %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
-  %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
-  %3 = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %1, <16 x i32> %2)
-  %4 = shufflevector <8 x i64> %3, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 3, i32 3, i32 4, i32 4, i32 7, i32 7>
-  ret <8 x i64> %4
-}
-
-declare <2 x i64> @llvm.x86.sse2.pmulu.dq(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.x86.sse41.pmuldq(<4 x i32>, <4 x i32>) nounwind readnone
-
-declare <4 x i64> @llvm.x86.avx2.pmulu.dq(<8 x i32>, <8 x i32>) nounwind readnone
-declare <4 x i64> @llvm.x86.avx2.pmul.dq(<8 x i32>, <8 x i32>) nounwind readnone
-
-declare <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32>, <16 x i32>) nounwind readnone
-declare <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32>, <16 x i32>) nounwind readnone

Removed: llvm/trunk/test/Transforms/InstCombine/x86-pack.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-pack.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-pack.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-pack.ll (removed)
@@ -1,366 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-;
-; UNDEF Elts
-;
-
-define <8 x i16> @undef_packssdw_128() {
-; CHECK-LABEL: @undef_packssdw_128(
-; CHECK-NEXT:    ret <8 x i16> undef
-;
-  %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> undef, <4 x i32> undef)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @undef_packusdw_128() {
-; CHECK-LABEL: @undef_packusdw_128(
-; CHECK-NEXT:    ret <8 x i16> undef
-;
-  %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> undef, <4 x i32> undef)
-  ret <8 x i16> %1
-}
-
-define <16 x i8> @undef_packsswb_128() {
-; CHECK-LABEL: @undef_packsswb_128(
-; CHECK-NEXT:    ret <16 x i8> undef
-;
-  %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> undef, <8 x i16> undef)
-  ret <16 x i8> %1
-}
-
-define <16 x i8> @undef_packuswb_128() {
-; CHECK-LABEL: @undef_packuswb_128(
-; CHECK-NEXT:    ret <16 x i8> undef
-;
-  %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> undef, <8 x i16> undef)
-  ret <16 x i8> %1
-}
-
-define <16 x i16> @undef_packssdw_256() {
-; CHECK-LABEL: @undef_packssdw_256(
-; CHECK-NEXT:    ret <16 x i16> undef
-;
-  %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> undef, <8 x i32> undef)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @undef_packusdw_256() {
-; CHECK-LABEL: @undef_packusdw_256(
-; CHECK-NEXT:    ret <16 x i16> undef
-;
-  %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> undef, <8 x i32> undef)
-  ret <16 x i16> %1
-}
-
-define <32 x i8> @undef_packsswb_256() {
-; CHECK-LABEL: @undef_packsswb_256(
-; CHECK-NEXT:    ret <32 x i8> undef
-;
-  %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> undef, <16 x i16> undef)
-  ret <32 x i8> %1
-}
-
-define <32 x i8> @undef_packuswb_256() {
-; CHECK-LABEL: @undef_packuswb_256(
-; CHECK-NEXT:    ret <32 x i8> undef
-;
-  %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> undef, <16 x i16> undef)
-  ret <32 x i8> %1
-}
-
-define <32 x i16> @undef_packssdw_512() {
-; CHECK-LABEL: @undef_packssdw_512(
-; CHECK-NEXT:    ret <32 x i16> undef
-;
-  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> undef, <16 x i32> undef)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @undef_packusdw_512() {
-; CHECK-LABEL: @undef_packusdw_512(
-; CHECK-NEXT:    ret <32 x i16> undef
-;
-  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> undef, <16 x i32> undef)
-  ret <32 x i16> %1
-}
-
-define <64 x i8> @undef_packsswb_512() {
-; CHECK-LABEL: @undef_packsswb_512(
-; CHECK-NEXT:    ret <64 x i8> undef
-;
-  %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> undef, <32 x i16> undef)
-  ret <64 x i8> %1
-}
-
-define <64 x i8> @undef_packuswb_512() {
-; CHECK-LABEL: @undef_packuswb_512(
-; CHECK-NEXT:    ret <64 x i8> undef
-;
-  %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> undef, <32 x i16> undef)
-  ret <64 x i8> %1
-}
-
-;
-; Constant Folding
-;
-
-define <8 x i16> @fold_packssdw_128() {
-; CHECK-LABEL: @fold_packssdw_128(
-; CHECK-NEXT:    ret <8 x i16> <i16 0, i16 -1, i16 32767, i16 -32768, i16 0, i16 0, i16 0, i16 0>
-;
-  %1 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> <i32 0, i32 -1, i32 65536, i32 -131072>, <4 x i32> zeroinitializer)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @fold_packusdw_128() {
-; CHECK-LABEL: @fold_packusdw_128(
-; CHECK-NEXT:    ret <8 x i16> <i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 0, i16 -32768, i16 -1>
-;
-  %1 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> undef, <4 x i32> <i32 0, i32 -1, i32 32768, i32 65537>)
-  ret <8 x i16> %1
-}
-
-define <16 x i8> @fold_packsswb_128() {
-; CHECK-LABEL: @fold_packsswb_128(
-; CHECK-NEXT:    ret <16 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>
-;
-  %1 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> zeroinitializer, <8 x i16> undef)
-  ret <16 x i8> %1
-}
-
-define <16 x i8> @fold_packuswb_128() {
-; CHECK-LABEL: @fold_packuswb_128(
-; CHECK-NEXT:    ret <16 x i8> <i8 0, i8 1, i8 0, i8 -1, i8 0, i8 0, i8 0, i8 15, i8 0, i8 127, i8 0, i8 1, i8 0, i8 1, i8 0, i8 0>
-;
-  %1 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> <i16 0, i16 1, i16 -1, i16 255, i16 65535, i16 -32768, i16 -127, i16 15>, <8 x i16> <i16 -15, i16 127, i16 32768, i16 -65535, i16 -255, i16 1, i16 -1, i16 0>)
-  ret <16 x i8> %1
-}
-
-define <16 x i16> @fold_packssdw_256() {
-; CHECK-LABEL: @fold_packssdw_256(
-; CHECK-NEXT:    ret <16 x i16> <i16 0, i16 256, i16 32767, i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 -127, i16 -32768, i16 -32767, i16 32767, i16 undef, i16 undef, i16 undef, i16 undef>
-;
-  %1 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> <i32 0, i32 256, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>, <8 x i32> undef)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @fold_packusdw_256() {
-; CHECK-LABEL: @fold_packusdw_256(
-; CHECK-NEXT:    ret <16 x i16> <i16 0, i16 0, i16 0, i16 -1, i16 0, i16 256, i16 -1, i16 0, i16 127, i16 -32768, i16 32767, i16 0, i16 0, i16 0, i16 0, i16 32767>
-;
-  %1 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> <i32 0, i32 -256, i32 -65535, i32 65536, i32 127, i32 32768, i32 32767, i32 -32767>, <8 x i32> <i32 0, i32 256, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>)
-  ret <16 x i16> %1
-}
-
-define <32 x i8> @fold_packsswb_256() {
-; CHECK-LABEL: @fold_packsswb_256(
-; CHECK-NEXT:    ret <32 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
-;
-  %1 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> undef, <16 x i16> zeroinitializer)
-  ret <32 x i8> %1
-}
-
-define <32 x i8> @fold_packuswb_256() {
-; CHECK-LABEL: @fold_packuswb_256(
-; CHECK-NEXT:    ret <32 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64>
-;
-  %1 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> zeroinitializer, <16 x i16> <i16 0, i16 -127, i16 -128, i16 -32768, i16 65536, i16 255, i16 256, i16 512, i16 -1, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64>)
-  ret <32 x i8> %1
-}
-
-define <32 x i16> @fold_packssdw_512() {
-; CHECK-LABEL: @fold_packssdw_512(
-; CHECK-NEXT:    ret <32 x i16> <i16 0, i16 512, i16 32767, i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 -127, i16 -32768, i16 -32767, i16 32767, i16 undef, i16 undef, i16 undef, i16 undef, i16 0, i16 512, i16 32767, i16 -32768, i16 undef, i16 undef, i16 undef, i16 undef, i16 -127, i16 -32768, i16 -32767, i16 32767, i16 undef, i16 undef, i16 undef, i16 undef>
-;
-  %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> <i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767, i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>, <16 x i32> undef)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @fold_packusdw_512() {
-; CHECK-LABEL: @fold_packusdw_512(
-; CHECK-NEXT:    ret <32 x i16> <i16 0, i16 0, i16 0, i16 -1, i16 0, i16 512, i16 -1, i16 0, i16 127, i16 -32768, i16 32767, i16 0, i16 0, i16 0, i16 0, i16 32767, i16 0, i16 0, i16 0, i16 -1, i16 0, i16 512, i16 -1, i16 0, i16 127, i16 -32768, i16 32767, i16 0, i16 0, i16 0, i16 0, i16 32767>
-;
-  %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> <i32 0, i32 -512, i32 -65535, i32 65536, i32 127, i32 32768, i32 32767, i32 -32767, i32 0, i32 -512, i32 -65535, i32 65536, i32 127, i32 32768, i32 32767, i32 -32767>, <16 x i32> <i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767, i32 0, i32 512, i32 65535, i32 -65536, i32 -127, i32 -32768, i32 -32767, i32 32767>)
-  ret <32 x i16> %1
-}
-
-define <64 x i8> @fold_packsswb_512() {
-; CHECK-LABEL: @fold_packsswb_512(
-; CHECK-NEXT:    ret <64 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
-;
-  %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> undef, <32 x i16> zeroinitializer)
-  ret <64 x i8> %1
-}
-
-define <64 x i8> @fold_packuswb_512() {
-; CHECK-LABEL: @fold_packuswb_512(
-; CHECK-NEXT:    ret <64 x i8> <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 -1, i8 -1, i8 -1, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 1, i8 2, i8 4, i8 8, i8 16, i8 32, i8 64>
-;
-  %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> zeroinitializer, <32 x i16> <i16 0, i16 -127, i16 -128, i16 -32768, i16 65536, i16 255, i16 512, i16 512, i16 -1, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64, i16 0, i16 -127, i16 -128, i16 -32768, i16 65536, i16 255, i16 512, i16 512, i16 -1, i16 1, i16 2, i16 4, i16 8, i16 16, i16 32, i16 64>)
-  ret <64 x i8> %1
-}
-
-;
-; Demanded Elts
-;
-
-define <8 x i16> @elts_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: @elts_packssdw_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> [[A0:%.*]], <4 x i32> undef)
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
-;
-  %1 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 3, i32 1, i32 undef, i32 undef>
-  %2 = shufflevector <4 x i32> %a1, <4 x i32> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>
-  %3 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %1, <4 x i32> %2)
-  %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 7, i32 7, i32 7, i32 7>
-  ret <8 x i16> %4
-}
-
-define <8 x i16> @elts_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) {
-; CHECK-LABEL: @elts_packusdw_128(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> [[A0:%.*]], <4 x i32> [[A1:%.*]])
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %1 = insertelement <4 x i32> %a0, i32 0, i32 0
-  %2 = insertelement <4 x i32> %a1, i32 0, i32 3
-  %3 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %1, <4 x i32> %2)
-  %4 = shufflevector <8 x i16> %3, <8 x i16> undef, <8 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 undef>
-  ret <8 x i16> %4
-}
-
-define <16 x i8> @elts_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: @elts_packsswb_128(
-; CHECK-NEXT:    ret <16 x i8> zeroinitializer
-;
-  %1 = insertelement <8 x i16> %a0, i16 0, i32 0
-  %2 = insertelement <8 x i16> %a1, i16 0, i32 0
-  %3 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %1, <8 x i16> %2)
-  %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>
-  ret <16 x i8> %4
-}
-
-define <16 x i8> @elts_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) {
-; CHECK-LABEL: @elts_packuswb_128(
-; CHECK-NEXT:    ret <16 x i8> undef
-;
-  %1 = insertelement <8 x i16> undef, i16 0, i32 0
-  %2 = insertelement <8 x i16> undef, i16 0, i32 0
-  %3 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %1, <8 x i16> %2)
-  %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-  ret <16 x i8> %4
-}
-
-define <16 x i16> @elts_packssdw_256(<8 x i32> %a0, <8 x i32> %a1) {
-; CHECK-LABEL: @elts_packssdw_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> [[A0:%.*]], <8 x i32> undef)
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 undef, i32 6, i32 5, i32 undef>
-  %3 = call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> %1, <8 x i32> %2)
-  %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 11, i32 12, i32 undef, i32 undef, i32 15>
-  ret <16 x i16> %4
-}
-
-define <16 x i16> @elts_packusdw_256(<8 x i32> %a0, <8 x i32> %a1) {
-; CHECK-LABEL: @elts_packusdw_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x i32> [[A1:%.*]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    [[TMP2:%.*]] = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> undef, <8 x i32> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <16 x i16> [[TMP2]], <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    ret <16 x i16> [[TMP3]]
-;
-  %1 = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %2 = shufflevector <8 x i32> %a1, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
-  %3 = call <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32> %1, <8 x i32> %2)
-  %4 = shufflevector <16 x i16> %3, <16 x i16> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef>
-  ret <16 x i16> %4
-}
-
-define <32 x i8> @elts_packsswb_256(<16 x i16> %a0, <16 x i16> %a1) {
-; CHECK-LABEL: @elts_packsswb_256(
-; CHECK-NEXT:    ret <32 x i8> zeroinitializer
-;
-  %1 = insertelement <16 x i16> %a0, i16 0, i32 0
-  %2 = insertelement <16 x i16> %a1, i16 0, i32 8
-  %3 = call <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16> %1, <16 x i16> %2)
-  %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24>
-  ret <32 x i8> %4
-}
-
-define <32 x i8> @elts_packuswb_256(<16 x i16> %a0, <16 x i16> %a1) {
-; CHECK-LABEL: @elts_packuswb_256(
-; CHECK-NEXT:    ret <32 x i8> undef
-;
-  %1 = insertelement <16 x i16> undef, i16 0, i32 1
-  %2 = insertelement <16 x i16> undef, i16 0, i32 0
-  %3 = call <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16> %1, <16 x i16> %2)
-  %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> zeroinitializer
-  ret <32 x i8> %4
-}
-
-define <32 x i16> @elts_packssdw_512(<16 x i32> %a0, <16 x i32> %a1) {
-; CHECK-LABEL: @elts_packssdw_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> [[A0:%.*]], <16 x i32> undef)
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 9, i32 8, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 undef, i32 6, i32 5, i32 undef, i32 undef, i32 10, i32 9, i32 undef, i32 undef, i32 14, i32 13, i32 undef>
-  %3 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %1, <16 x i32> %2)
-  %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 undef, i32 undef, i32 2, i32 3, i32 4, i32 undef, i32 undef, i32 7, i32 8, i32 undef, i32 undef, i32 11, i32 12, i32 undef, i32 undef, i32 15, i32 undef, i32 undef, i32 18, i32 19, i32 20, i32 undef, i32 undef, i32 23, i32 24, i32 undef, i32 undef, i32 27, i32 28, i32 undef, i32 undef, i32 31>
-  ret <32 x i16> %4
-}
-
-define <32 x i16> @elts_packusdw_512(<16 x i32> %a0, <16 x i32> %a1) {
-; CHECK-LABEL: @elts_packusdw_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i32> [[A1:%.*]], <16 x i32> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-; CHECK-NEXT:    [[TMP2:%.*]] = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> undef, <16 x i32> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <32 x i16> [[TMP2]], <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    ret <32 x i16> [[TMP3]]
-;
-  %1 = shufflevector <16 x i32> %a0, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  %2 = shufflevector <16 x i32> %a1, <16 x i32> undef, <16 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8>
-  %3 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %1, <16 x i32> %2)
-  %4 = shufflevector <32 x i16> %3, <32 x i16> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 28, i32 29, i32 30, i32 31, i32 undef, i32 undef, i32 undef, i32 undef>
-  ret <32 x i16> %4
-}
-
-define <64 x i8> @elts_packsswb_512(<32 x i16> %a0, <32 x i16> %a1) {
-; CHECK-LABEL: @elts_packsswb_512(
-; CHECK-NEXT:    ret <64 x i8> zeroinitializer
-;
-  %1 = insertelement <32 x i16> %a0, i16 0, i32 0
-  %2 = insertelement <32 x i16> %a1, i16 0, i32 8
-  %3 = insertelement <32 x i16> %1, i16 0, i32 16
-  %4 = insertelement <32 x i16> %2, i16 0, i32 24
-  %5 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %3, <32 x i16> %4)
-  %6 = shufflevector <64 x i8> %5, <64 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 24, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56, i32 56>
-  ret <64 x i8> %6
-}
-
-define <64 x i8> @elts_packuswb_512(<32 x i16> %a0, <32 x i16> %a1) {
-; CHECK-LABEL: @elts_packuswb_512(
-; CHECK-NEXT:    ret <64 x i8> undef
-;
-  %1 = insertelement <32 x i16> undef, i16 0, i32 1
-  %2 = insertelement <32 x i16> undef, i16 0, i32 0
-  %3 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %1, <32 x i16> %2)
-  %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> zeroinitializer
-  ret <64 x i8> %4
-}
-
-declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
-declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
-declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
-declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
-
-declare <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32>, <8 x i32>) nounwind readnone
-declare <16 x i16> @llvm.x86.avx2.packusdw(<8 x i32>, <8 x i32>) nounwind readnone
-declare <32 x i8> @llvm.x86.avx2.packsswb(<16 x i16>, <16 x i16>) nounwind readnone
-declare <32 x i8> @llvm.x86.avx2.packuswb(<16 x i16>, <16 x i16>) nounwind readnone
-
-declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>) nounwind readnone
-declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>) nounwind readnone
-declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>) nounwind readnone
-declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>) nounwind readnone

Removed: llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-pshufb.ll (removed)
@@ -1,515 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-; Verify that instcombine is able to fold identity shuffles.
-
-define <16 x i8> @identity_test(<16 x i8> %InVec) {
-; CHECK-LABEL: @identity_test(
-; CHECK-NEXT:    ret <16 x i8> %InVec
-;
-  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
-  ret <16 x i8> %1
-}
-
-define <32 x i8> @identity_test_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @identity_test_avx2(
-; CHECK-NEXT:    ret <32 x i8> %InVec
-;
-  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
-  ret <32 x i8> %1
-}
-
-define <64 x i8> @identity_test_avx512(<64 x i8> %InVec) {
-; CHECK-LABEL: @identity_test_avx512(
-; CHECK-NEXT:    ret <64 x i8> %InVec
-;
-  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
-  ret <64 x i8> %1
-}
-
-; Verify that instcombine is able to fold byte shuffles with zero masks.
-
-define <16 x i8> @fold_to_zero_vector(<16 x i8> %InVec) {
-; CHECK-LABEL: @fold_to_zero_vector(
-; CHECK-NEXT:    ret <16 x i8> zeroinitializer
-;
-  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
-  ret <16 x i8> %1
-}
-
-define <32 x i8> @fold_to_zero_vector_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @fold_to_zero_vector_avx2(
-; CHECK-NEXT:    ret <32 x i8> zeroinitializer
-;
-  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
-  ret <32 x i8> %1
-}
-
-define <64 x i8> @fold_to_zero_vector_avx512(<64 x i8> %InVec) {
-; CHECK-LABEL: @fold_to_zero_vector_avx512(
-; CHECK-NEXT:    ret <64 x i8> zeroinitializer
-;
-  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
-  ret <64 x i8> %1
-}
-
-; Instcombine should be able to fold the following byte shuffle to a builtin shufflevector
-; with a shuffle mask of all zeroes.
-
-define <16 x i8> @splat_test(<16 x i8> %InVec) {
-; CHECK-LABEL: @splat_test(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> zeroinitializer
-; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
-;
-  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> zeroinitializer)
-  ret <16 x i8> %1
-}
-
-; In the test case below, elements in the low 128-bit lane of the result
-; vector are equal to the lower byte of %InVec (shuffle index 0).
-; Elements in the high 128-bit lane of the result vector are equal to
-; the lower byte in the high 128-bit lane of %InVec (shuffle index 16).
-
-define <32 x i8> @splat_test_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @splat_test_avx2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
-;
-  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> zeroinitializer)
-  ret <32 x i8> %1
-}
-
-define <64 x i8> @splat_test_avx512(<64 x i8> %InVec) {
-; CHECK-LABEL: @splat_test_avx512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
-; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
-;
-  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> zeroinitializer)
-  ret <64 x i8> %1
-}
-
-; Each of the byte shuffles in the following tests is equivalent to a blend between
-; vector %InVec and a vector of all zeroes.
-
-define <16 x i8> @blend1(<16 x i8> %InVec) {
-; CHECK-LABEL: @blend1(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 16, i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15>
-; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
-;
-  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15>)
-  ret <16 x i8> %1
-}
-
-define <16 x i8> @blend2(<16 x i8> %InVec) {
-; CHECK-LABEL: @blend2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 16, i32 16, i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15>
-; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
-;
-  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15>)
-  ret <16 x i8> %1
-}
-
-define <16 x i8> @blend3(<16 x i8> %InVec) {
-; CHECK-LABEL: @blend3(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
-;
-  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15>)
-  ret <16 x i8> %1
-}
-
-define <16 x i8> @blend4(<16 x i8> %InVec) {
-; CHECK-LABEL: @blend4(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
-;
-  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
-  ret <16 x i8> %1
-}
-
-define <16 x i8> @blend5(<16 x i8> %InVec) {
-; CHECK-LABEL: @blend5(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
-;
-  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
-  ret <16 x i8> %1
-}
-
-define <16 x i8> @blend6(<16 x i8> %InVec) {
-; CHECK-LABEL: @blend6(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
-;
-  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
-  ret <16 x i8> %1
-}
-
-define <32 x i8> @blend1_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @blend1_avx2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 32, i32 1, i32 32, i32 3, i32 32, i32 5, i32 32, i32 7, i32 32, i32 9, i32 32, i32 11, i32 32, i32 13, i32 32, i32 15, i32 48, i32 17, i32 48, i32 19, i32 48, i32 21, i32 48, i32 23, i32 48, i32 25, i32 48, i32 27, i32 48, i32 29, i32 48, i32 31>
-; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
-;
-  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15, i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15>)
-  ret <32 x i8> %1
-}
-
-define <32 x i8> @blend2_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @blend2_avx2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 32, i32 32, i32 2, i32 3, i32 32, i32 32, i32 6, i32 7, i32 32, i32 32, i32 10, i32 11, i32 32, i32 32, i32 14, i32 15, i32 48, i32 48, i32 18, i32 19, i32 48, i32 48, i32 22, i32 23, i32 48, i32 48, i32 26, i32 27, i32 48, i32 48, i32 30, i32 31>
-; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
-;
-  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15, i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15>)
-  ret <32 x i8> %1
-}
-
-define <32 x i8> @blend3_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @blend3_avx2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 20, i32 21, i32 22, i32 23, i32 48, i32 48, i32 48, i32 48, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
-;
-  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15>)
-  ret <32 x i8> %1
-}
-
-define <32 x i8> @blend4_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @blend4_avx2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
-;
-  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
-  ret <32 x i8> %1
-}
-
-define <32 x i8> @blend5_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @blend5_avx2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
-; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
-;
-  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
-  ret <32 x i8> %1
-}
-
-define <32 x i8> @blend6_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @blend6_avx2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
-; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
-;
-  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
-  ret <32 x i8> %1
-}
-
-define <64 x i8> @blend1_avx512(<64 x i8> %InVec) {
-; CHECK-LABEL: @blend1_avx512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <64 x i32> <i32 64, i32 1, i32 64, i32 3, i32 64, i32 5, i32 64, i32 7, i32 64, i32 9, i32 64, i32 11, i32 64, i32 13, i32 64, i32 15, i32 80, i32 17, i32 80, i32 19, i32 80, i32 21, i32 80, i32 23, i32 80, i32 25, i32 80, i32 27, i32 80, i32 29, i32 80, i32 31, i32 96, i32 33, i32 96, i32 35, i3
 2 96, i32 37, i32 96, i32 39, i32 96, i32 41, i32 96, i32 43, i32 96, i32 45, i32 96, i32 47, i32 112, i32 49, i32 112, i32 51, i32 112, i32 53, i32 112, i32 55, i32 112, i32 57, i32 112, i32 59, i32 112, i32 61, i32 112, i32 63>
-; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
-;
-  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15, i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15, i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15, i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15>)
-  ret <64 x i8> %1
-}
-
-define <64 x i8> @blend2_avx512(<64 x i8> %InVec) {
-; CHECK-LABEL: @blend2_avx512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <64 x i32> <i32 64, i32 64, i32 2, i32 3, i32 64, i32 64, i32 6, i32 7, i32 64, i32 64, i32 10, i32 11, i32 64, i32 64, i32 14, i32 15, i32 80, i32 80, i32 18, i32 19, i32 80, i32 80, i32 22, i32 23, i32 80, i32 80, i32 26, i32 27, i32 80, i32 80, i32 30, i32 31, i32 96, i32 96, i32 34, i32 35, i
 32 96, i32 96, i32 38, i32 39, i32 96, i32 96, i32 42, i32 43, i32 96, i32 96, i32 46, i32 47, i32 112, i32 112, i32 50, i32 51, i32 112, i32 112, i32 54, i32 55, i32 112, i32 112, i32 58, i32 59, i32 112, i32 112, i32 62, i32 63>
-; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
-;
-  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15, i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15, i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15, i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15>)
-  ret <64 x i8> %1
-}
-
-define <64 x i8> @blend3_avx512(<64 x i8> %InVec) {
-; CHECK-LABEL: @blend3_avx512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <64 x i32> <i32 64, i32 64, i32 64, i32 64, i32 4, i32 5, i32 6, i32 7, i32 64, i32 64, i32 64, i32 64, i32 12, i32 13, i32 14, i32 15, i32 80, i32 80, i32 80, i32 80, i32 20, i32 21, i32 22, i32 23, i32 80, i32 80, i32 80, i32 80, i32 28, i32 29, i32 30, i32 31, i32 96, i32 96, i32 96, i32 96, i
 32 36, i32 37, i32 38, i32 39, i32 96, i32 96, i32 96, i32 96, i32 44, i32 45, i32 46, i32 47, i32 112, i32 112, i32 112, i32 112, i32 52, i32 53, i32 54, i32 55, i32 112, i32 112, i32 112, i32 112, i32 60, i32 61, i32 62, i32 63>
-; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
-;
-  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15>)
-  ret <64 x i8> %1
-}
-
-define <64 x i8> @blend4_avx512(<64 x i8> %InVec) {
-; CHECK-LABEL: @blend4_avx512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <64 x i32> <i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31, i32 96, i32 96, i32 96, i32 96,
  i32 96, i32 96, i32 96, i32 96, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
-; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
-;
-  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
-  ret <64 x i8> %1
-}
-
-define <64 x i8> @blend5_avx512(<64 x i8> %InVec) {
-; CHECK-LABEL: @blend5_avx512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 16, i32 17, i32 18, i32 19, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 32, i32 33, i32 34, i32 35, i
 32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 48, i32 49, i32 50, i32 51, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112>
-; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
-;
-  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
-  ret <64 x i8> %1
-}
-
-define <64 x i8> @blend6_avx512(<64 x i8> %InVec) {
-; CHECK-LABEL: @blend6_avx512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <64 x i32> <i32 0, i32 1, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 16, i32 17, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 32, i32 33, i32 96, i32 96,
  i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 48, i32 49, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112>
-; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
-;
-  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128,i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
-  ret <64 x i8> %1
-}
-
-; movq idiom.
-define <16 x i8> @movq_idiom(<16 x i8> %InVec) {
-; CHECK-LABEL: @movq_idiom(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
-;
-  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
-  ret <16 x i8> %1
-}
-
-define <32 x i8> @movq_idiom_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @movq_idiom_avx2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
-; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
-;
-  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
-  ret <32 x i8> %1
-}
-
-define <64 x i8> @movq_idiom_avx512(<64 x i8> %InVec) {
-; CHECK-LABEL: @movq_idiom_avx512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 64, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 80, i32 96, i32 96, i32 96, i32 96, i32 9
 6, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 96, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112, i32 112>
-; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
-;
-  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
-  ret <64 x i8> %1
-}
-
-; Vector permutations using byte shuffles.
-
-define <16 x i8> @permute1(<16 x i8> %InVec) {
-; CHECK-LABEL: @permute1(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
-;
-  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15>)
-  ret <16 x i8> %1
-}
-
-define <16 x i8> @permute2(<16 x i8> %InVec) {
-; CHECK-LABEL: @permute2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
-;
-  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
-  ret <16 x i8> %1
-}
-
-define <32 x i8> @permute1_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @permute1_avx2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
-;
-  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15>)
-  ret <32 x i8> %1
-}
-
-define <32 x i8> @permute2_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @permute2_avx2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
-;
-  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
-  ret <32 x i8> %1
-}
-
-define <64 x i8> @permute1_avx512(<64 x i8> %InVec) {
-; CHECK-LABEL: @permute1_avx512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31, i32 36, i32 37, i32 38, i32 39, i32 36, i32 37, i32 38, i32 39, i32 44, i32 45, i32 46, i32 47, i32 44, i32 45, i32 46, i32 47, i32 52, i32 53, i32 54, i32 55, i32 52, i32 53, i32 54, i32 55, i32 60, i32 61, i32 62, i32 63, i32 60, i32 61, i32 62, i32 63>
-; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
-;
-  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15>)
-  ret <64 x i8> %1
-}
-
-define <64 x i8> @permute2_avx512(<64 x i8> %InVec) {
-; CHECK-LABEL: @permute2_avx512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> undef, <64 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55>
-; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
-;
-  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
-  ret <64 x i8> %1
-}
-
-; Test that instcombine correctly folds a pshufb with values that
-; are not -128 and that are not encoded in four bits.
-
-define <16 x i8> @identity_test2_2(<16 x i8> %InVec) {
-; CHECK-LABEL: @identity_test2_2(
-; CHECK-NEXT:    ret <16 x i8> %InVec
-;
-  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>)
-  ret <16 x i8> %1
-}
-
-define <32 x i8> @identity_test_avx2_2(<32 x i8> %InVec) {
-; CHECK-LABEL: @identity_test_avx2_2(
-; CHECK-NEXT:    ret <32 x i8> %InVec
-;
-  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 16, i8 33, i8 66, i8 19, i8 36, i8 69, i8 22, i8 39, i8 72, i8 25, i8 42, i8 75, i8 28, i8 45, i8 78, i8 31, i8 48, i8 81, i8 34, i8 51, i8 84, i8 37, i8 54, i8 87, i8 40, i8 57, i8 90, i8 43, i8 60, i8 93, i8 46, i8 63>)
-  ret <32 x i8> %1
-}
-
-define <64 x i8> @identity_test_avx512_2(<64 x i8> %InVec) {
-; CHECK-LABEL: @identity_test_avx512_2(
-; CHECK-NEXT:    ret <64 x i8> %InVec
-;
-  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 16, i8 33, i8 66, i8 19, i8 36, i8 69, i8 22, i8 39, i8 72, i8 25, i8 42, i8 75, i8 28, i8 45, i8 78, i8 31, i8 48, i8 81, i8 34, i8 51, i8 84, i8 37, i8 54, i8 87, i8 40, i8 57, i8 90, i8 43, i8 60, i8 93, i8 46, i8 63, i8 96, i8 49, i8 66, i8 99, i8 52, i8 69, i8 102, i8 55, i8 72, i8 105, i8 58, i8 75, i8 108, i8 61, i8 78, i8 111, i8 64, i8 81, i8 114, i8 67, i8 84, i8 117, i8 70, i8 87, i8 120, i8 73, i8 90, i8 123, i8 76, i8 93, i8 126, i8 79>)
-  ret <64 x i8> %1
-}
-
-define <16 x i8> @fold_to_zero_vector_2(<16 x i8> %InVec) {
-; CHECK-LABEL: @fold_to_zero_vector_2(
-; CHECK-NEXT:    ret <16 x i8> zeroinitializer
-;
-  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -125, i8 -1, i8 -53, i8 -32, i8 -4, i8 -7, i8 -33, i8 -66, i8 -99, i8 -120, i8 -100, i8 -22, i8 -17, i8 -1, i8 -11, i8 -15>)
-  ret <16 x i8> %1
-}
-
-define <32 x i8> @fold_to_zero_vector_avx2_2(<32 x i8> %InVec) {
-; CHECK-LABEL: @fold_to_zero_vector_avx2_2(
-; CHECK-NEXT:    ret <32 x i8> zeroinitializer
-;
-  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -127, i8 -1, i8 -53, i8 -32, i8 -4, i8 -7, i8 -33, i8 -66, i8 -99, i8 -120, i8 -100, i8 -22, i8 -17, i8 -1, i8 -11, i8 -15, i8 -126, i8 -2, i8 -52, i8 -31, i8 -5, i8 -8, i8 -34, i8 -67, i8 -100, i8 -119, i8 -101, i8 -23, i8 -16, i8 -2, i8 -12, i8 -16>)
-  ret <32 x i8> %1
-}
-
-define <64 x i8> @fold_to_zero_vector_avx512_2(<64 x i8> %InVec) {
-; CHECK-LABEL: @fold_to_zero_vector_avx512_2(
-; CHECK-NEXT:    ret <64 x i8> zeroinitializer
-;
-  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 -127, i8 -1, i8 -53, i8 -32, i8 -4, i8 -7, i8 -33, i8 -66, i8 -99, i8 -120, i8 -100, i8 -22, i8 -17, i8 -1, i8 -11, i8 -15, i8 -126, i8 -2, i8 -52, i8 -31, i8 -5, i8 -8, i8 -34, i8 -67, i8 -100, i8 -119, i8 -101, i8 -23, i8 -16, i8 -2, i8 -12, i8 -16, i8 -125, i8 -3, i8 -51, i8 -30, i8 -6, i8 -9, i8 -35, i8 -68, i8 -101, i8 -118, i8 -102, i8 -24, i8 -15, i8 -3, i8 -13, i8 -17, i8 -124, i8 -4, i8 -56, i8 -29, i8 -7, i8 -10, i8 -36, i8 -69, i8 -102, i8 -117, i8 -103, i8 -25, i8 -14, i8 -4, i8 -14, i8 -18>)
-  ret <64 x i8> %1
-}
-
-define <16 x i8> @permute3(<16 x i8> %InVec) {
-; CHECK-LABEL: @permute3(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
-;
-  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 48, i8 17, i8 34, i8 51, i8 20, i8 37, i8 54, i8 23, i8 16, i8 49, i8 66, i8 19, i8 52, i8 69, i8 22, i8 55>)
-  ret <16 x i8> %1
-}
-
-define <32 x i8> @permute3_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @permute3_avx2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
-; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
-;
-  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 52, i8 21, i8 38, i8 55, i8 20, i8 37, i8 54, i8 23, i8 28, i8 61, i8 78, i8 31, i8 60, i8 29, i8 30, i8 79, i8 52, i8 21, i8 38, i8 55, i8 20, i8 53, i8 102, i8 23, i8 92, i8 93, i8 94, i8 95, i8 108, i8 109, i8 110, i8 111>)
-  ret <32 x i8> %1
-}
-
-define <64 x i8> @permute3_avx512(<64 x i8> %InVec) {
-; CHECK-LABEL: @permute3_avx512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> undef, <64 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31, i32 36, i32 37, i32 38, i32 39, i32 36, i32 37, i32 38, i32 39, i32 44, i32 45, i32 46, i32 47, i32 44, i32 45, i32 46, i32 47, i32 52, i32 53, i32 54, i32 55, i32 52, i32 53, i32 54, i32 55, i32 60, i32 61, i32 62, i32 63, i32 60, i32 61, i32 62, i32 63>
-; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
-;
-  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 52, i8 21, i8 38, i8 55, i8 20, i8 37, i8 54, i8 23, i8 28, i8 61, i8 78, i8 31, i8 60, i8 29, i8 30, i8 79, i8 52, i8 21, i8 38, i8 55, i8 20, i8 53, i8 102, i8 23, i8 92, i8 93, i8 94, i8 95, i8 108, i8 109, i8 110, i8 111, i8 52, i8 21, i8 38, i8 55, i8 20, i8 37, i8 54, i8 23, i8 28, i8 61, i8 78, i8 31, i8 60, i8 29, i8 30, i8 79, i8 52, i8 21, i8 38, i8 55, i8 20, i8 53, i8 102, i8 23, i8 108, i8 109, i8 110, i8 111, i8 124, i8 125, i8 126, i8 127>)
-  ret <64 x i8> %1
-}
-
-; FIXME: Verify that instcombine is able to fold constant byte shuffles with undef mask elements.
-
-define <16 x i8> @fold_with_undef_elts(<16 x i8> %InVec) {
-; CHECK-LABEL: @fold_with_undef_elts(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 16, i32 undef, i32 16, i32 1, i32 16, i32 undef, i32 16, i32 2, i32 16, i32 undef, i32 16, i32 3, i32 16, i32 undef, i32 16>
-; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
-;
-  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128>)
-  ret <16 x i8> %1
-}
-
-define <32 x i8> @fold_with_undef_elts_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @fold_with_undef_elts_avx2(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 32, i32 undef, i32 32, i32 1, i32 32, i32 undef, i32 32, i32 2, i32 32, i32 undef, i32 32, i32 3, i32 32, i32 undef, i32 32, i32 16, i32 48, i32 undef, i32 48, i32 17, i32 48, i32 undef, i32 48, i32 18, i32 48, i32 undef, i32 48, i32 19, i32 48, i32 undef, i32 48>
-; CHECK-NEXT:    ret <32 x i8> [[TMP1]]
-;
-  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128, i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128>)
-  ret <32 x i8> %1
-}
-
-define <64 x i8> @fold_with_undef_elts_avx512(<64 x i8> %InVec) {
-; CHECK-LABEL: @fold_with_undef_elts_avx512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <64 x i8> %InVec, <64 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <64 x i32> <i32 0, i32 64, i32 undef, i32 64, i32 1, i32 64, i32 undef, i32 64, i32 2, i32 64, i32 undef, i32 64, i32 3, i32 64, i32 undef, i32 64, i32 16, i32 80, i32 undef, i32 80, i32 17, i32 80, i32 undef, i32 80, i32 18, i32 80, i32 undef, i32 80, i32 19, i32 80, i32 undef, i32 80, i32 32, i
 32 96, i32 undef, i32 96, i32 33, i32 96, i32 undef, i32 96, i32 34, i32 96, i32 undef, i32 96, i32 35, i32 96, i32 undef, i32 96, i32 48, i32 112, i32 undef, i32 112, i32 49, i32 112, i32 undef, i32 112, i32 50, i32 112, i32 undef, i32 112, i32 51, i32 112, i32 undef, i32 112>
-; CHECK-NEXT:    ret <64 x i8> [[TMP1]]
-;
-  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> <i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128, i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128, i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128, i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128>)
-  ret <64 x i8> %1
-}
-
-define <16 x i8> @fold_with_allundef_elts(<16 x i8> %InVec) {
-; CHECK-LABEL: @fold_with_allundef_elts(
-; CHECK-NEXT:    ret <16 x i8> undef
-;
-  %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> undef)
-  ret <16 x i8> %1
-}
-
-define <32 x i8> @fold_with_allundef_elts_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @fold_with_allundef_elts_avx2(
-; CHECK-NEXT:    ret <32 x i8> undef
-;
-  %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> undef)
-  ret <32 x i8> %1
-}
-
-define <64 x i8> @fold_with_allundef_elts_avx512(<64 x i8> %InVec) {
-; CHECK-LABEL: @fold_with_allundef_elts_avx512(
-; CHECK-NEXT:    ret <64 x i8> undef
-;
-  %1 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> undef)
-  ret <64 x i8> %1
-}
-
-; Demanded elts tests.
-
-define <16 x i8> @demanded_elts_insertion(<16 x i8> %InVec, <16 x i8> %BaseMask, i8 %M0, i8 %M15) {
-; CHECK-LABEL: @demanded_elts_insertion(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> %BaseMask)
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 undef>
-; CHECK-NEXT:    ret <16 x i8> [[TMP2]]
-;
-  %1 = insertelement <16 x i8> %BaseMask, i8 %M0, i32 0
-  %2 = insertelement <16 x i8> %1, i8 %M15, i32 15
-  %3 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> %2)
-  %4 = shufflevector <16 x i8> %3, <16 x i8> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 undef>
-  ret <16 x i8> %4
-}
-
-define <32 x i8> @demanded_elts_insertion_avx2(<32 x i8> %InVec, <32 x i8> %BaseMask, i8 %M0, i8 %M22) {
-; CHECK-LABEL: @demanded_elts_insertion_avx2(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <32 x i8> %BaseMask, i8 %M0, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> [[TMP1]])
-; CHECK-NEXT:    ret <32 x i8> [[TMP2]]
-;
-  %1 = insertelement <32 x i8> %BaseMask, i8 %M0, i32 0
-  %2 = insertelement <32 x i8> %1, i8 %M22, i32 22
-  %3 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> %2)
-  %4 = shufflevector <32 x i8> %3, <32 x i8> undef, <32 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 undef, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-  ret <32 x i8> %4
-}
-
-define <64 x i8> @demanded_elts_insertion_avx512(<64 x i8> %InVec, <64 x i8> %BaseMask, i8 %M0, i8 %M30) {
-; CHECK-LABEL: @demanded_elts_insertion_avx512(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <64 x i8> undef, i8 %M0, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <64 x i8> [[TMP2]], <64 x i8> undef, <64 x i32> zeroinitializer
-; CHECK-NEXT:    ret <64 x i8> [[TMP3]]
-;
-  %1 = insertelement <64 x i8> %BaseMask, i8 %M0, i32 0
-  %2 = insertelement <64 x i8> %1, i8 %M30, i32 30
-  %3 = tail call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %InVec, <64 x i8> %2)
-  %4 = shufflevector <64 x i8> %3, <64 x i8> undef, <64 x i32> zeroinitializer
-  ret <64 x i8> %4
-}
-
-declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
-declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)
-declare <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>)

Removed: llvm/trunk/test/Transforms/InstCombine/x86-sse.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-sse.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-sse.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-sse.ll (removed)
@@ -1,613 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define float @test_rcp_ss_0(float %a) {
-; CHECK-LABEL: @test_rcp_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
-; CHECK-NEXT:    ret float [[TMP3]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
-  %6 = extractelement <4 x float> %5, i32 0
-  ret float %6
-}
-
-define float @test_rcp_ss_1(float %a) {
-; CHECK-LABEL: @test_rcp_ss_1(
-; CHECK-NEXT:    ret float 1.000000e+00
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
-  %6 = extractelement <4 x float> %5, i32 1
-  ret float %6
-}
-
-define float @test_sqrt_ss_0(float %a) {
-; CHECK-LABEL: @test_sqrt_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
-; CHECK-NEXT:    ret float [[TMP3]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
-  %6 = extractelement <4 x float> %5, i32 0
-  ret float %6
-}
-
-define float @test_sqrt_ss_2(float %a) {
-; CHECK-LABEL: @test_sqrt_ss_2(
-; CHECK-NEXT:    ret float 2.000000e+00
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
-  %6 = extractelement <4 x float> %5, i32 2
-  ret float %6
-}
-
-define float @test_rsqrt_ss_0(float %a) {
-; CHECK-LABEL: @test_rsqrt_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
-; CHECK-NEXT:    ret float [[TMP3]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
-  %6 = extractelement <4 x float> %5, i32 0
-  ret float %6
-}
-
-define float @test_rsqrt_ss_3(float %a) {
-; CHECK-LABEL: @test_rsqrt_ss_3(
-; CHECK-NEXT:    ret float 3.000000e+00
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
-  %6 = extractelement <4 x float> %5, i32 3
-  ret float %6
-}
-
-define float @test_add_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_add_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd float %a, %b
-; CHECK-NEXT:    ret float [[TMP1]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %8)
-  %r = extractelement <4 x float> %9, i32 0
-  ret float %r
-}
-
-define float @test_add_ss_1(float %a, float %b) {
-; CHECK-LABEL: @test_add_ss_1(
-; CHECK-NEXT:    ret float 1.000000e+00
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %5)
-  %7 = extractelement <4 x float> %6, i32 1
-  ret float %7
-}
-
-define float @test_sub_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_sub_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub float %a, %b
-; CHECK-NEXT:    ret float [[TMP1]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %8)
-  %r = extractelement <4 x float> %9, i32 0
-  ret float %r
-}
-
-define float @test_sub_ss_2(float %a, float %b) {
-; CHECK-LABEL: @test_sub_ss_2(
-; CHECK-NEXT:    ret float 2.000000e+00
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %5)
-  %7 = extractelement <4 x float> %6, i32 2
-  ret float %7
-}
-
-define float @test_mul_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_mul_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul float %a, %b
-; CHECK-NEXT:    ret float [[TMP1]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %8)
-  %r = extractelement <4 x float> %9, i32 0
-  ret float %r
-}
-
-define float @test_mul_ss_3(float %a, float %b) {
-; CHECK-LABEL: @test_mul_ss_3(
-; CHECK-NEXT:    ret float 3.000000e+00
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %5)
-  %7 = extractelement <4 x float> %6, i32 3
-  ret float %7
-}
-
-define float @test_div_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_div_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv float %a, %b
-; CHECK-NEXT:    ret float [[TMP1]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %8)
-  %r = extractelement <4 x float> %9, i32 0
-  ret float %r
-}
-
-define float @test_div_ss_1(float %a, float %b) {
-; CHECK-LABEL: @test_div_ss_1(
-; CHECK-NEXT:    ret float 1.000000e+00
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %5)
-  %7 = extractelement <4 x float> %6, i32 1
-  ret float %7
-}
-
-define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @test_min_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %b)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %3)
-  ret <4 x float> %4
-}
-
-define float @test_min_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_min_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
-; CHECK-NEXT:    ret float [[TMP4]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %8)
-  %10 = extractelement <4 x float> %9, i32 0
-  ret float %10
-}
-
-define float @test_min_ss_2(float %a, float %b) {
-; CHECK-LABEL: @test_min_ss_2(
-; CHECK-NEXT:    ret float 2.000000e+00
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %5)
-  %7 = extractelement <4 x float> %6, i32 2
-  ret float %7
-}
-
-define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @test_max_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %b)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %3)
-  ret <4 x float> %4
-}
-
-define float @test_max_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_max_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
-; CHECK-NEXT:    ret float [[TMP4]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %8)
-  %10 = extractelement <4 x float> %9, i32 0
-  ret float %10
-}
-
-define float @test_max_ss_3(float %a, float %b) {
-; CHECK-LABEL: @test_max_ss_3(
-; CHECK-NEXT:    ret float 3.000000e+00
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %5)
-  %7 = extractelement <4 x float> %6, i32 3
-  ret float %7
-}
-
-define <4 x float> @test_cmp_ss(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @test_cmp_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %b, i8 0)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %3, i8 0)
-  ret <4 x float> %4
-}
-
-define float @test_cmp_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_cmp_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i8 0)
-; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
-; CHECK-NEXT:    ret float [[R]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %8, i8 0)
-  %r = extractelement <4 x float> %9, i32 0
-  ret float %r
-}
-
-define float @test_cmp_ss_1(float %a, float %b) {
-; CHECK-LABEL: @test_cmp_ss_1(
-; CHECK-NEXT:    ret float 1.000000e+00
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %5, i8 0)
-  %7 = extractelement <4 x float> %6, i32 1
-  ret float %7
-}
-
-define i32 @test_comieq_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_comieq_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %4, <4 x float> %8)
-  ret i32 %9
-}
-
-define i32 @test_comige_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_comige_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> %4, <4 x float> %8)
-  ret i32 %9
-}
-
-define i32 @test_comigt_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_comigt_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> %4, <4 x float> %8)
-  ret i32 %9
-}
-
-define i32 @test_comile_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_comile_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> %4, <4 x float> %8)
-  ret i32 %9
-}
-
-define i32 @test_comilt_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_comilt_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> %4, <4 x float> %8)
-  ret i32 %9
-}
-
-define i32 @test_comineq_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_comineq_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %4, <4 x float> %8)
-  ret i32 %9
-}
-
-define i32 @test_ucomieq_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_ucomieq_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %4, <4 x float> %8)
-  ret i32 %9
-}
-
-define i32 @test_ucomige_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_ucomige_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %4, <4 x float> %8)
-  ret i32 %9
-}
-
-define i32 @test_ucomigt_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_ucomigt_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %4, <4 x float> %8)
-  ret i32 %9
-}
-
-define i32 @test_ucomile_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_ucomile_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %4, <4 x float> %8)
-  ret i32 %9
-}
-
-define i32 @test_ucomilt_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_ucomilt_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %4, <4 x float> %8)
-  ret i32 %9
-}
-
-define i32 @test_ucomineq_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_ucomineq_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %4, <4 x float> %8)
-  ret i32 %9
-}
-
-declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>)
-declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>)
-declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>)
-
-declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
-declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8)
-
-declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>)
-declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>)
-declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>)
-declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>)
-declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>)
-declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>)
-
-declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>)
-declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>)
-declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>)
-declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>)
-declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>)
-declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>)

Removed: llvm/trunk/test/Transforms/InstCombine/x86-sse2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-sse2.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-sse2.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-sse2.ll (removed)
@@ -1,460 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define double @test_sqrt_sd_0(double %a) {
-; CHECK-LABEL: @test_sqrt_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
-; CHECK-NEXT:    ret double [[TMP3]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
-  %4 = extractelement <2 x double> %3, i32 0
-  ret double %4
-}
-
-define double @test_sqrt_sd_1(double %a) {
-; CHECK-LABEL: @test_sqrt_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
-  %4 = extractelement <2 x double> %3, i32 1
-  ret double %4
-}
-
-define double @test_add_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_add_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = fadd double %a, %b
-; CHECK-NEXT:    ret double [[TMP1]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4)
-  %6 = extractelement <2 x double> %5, i32 0
-  ret double %6
-}
-
-define double @test_add_sd_1(double %a, double %b) {
-; CHECK-LABEL: @test_add_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4)
-  %6 = extractelement <2 x double> %5, i32 1
-  ret double %6
-}
-
-define double @test_sub_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_sub_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = fsub double %a, %b
-; CHECK-NEXT:    ret double [[TMP1]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4)
-  %6 = extractelement <2 x double> %5, i32 0
-  ret double %6
-}
-
-define double @test_sub_sd_1(double %a, double %b) {
-; CHECK-LABEL: @test_sub_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4)
-  %6 = extractelement <2 x double> %5, i32 1
-  ret double %6
-}
-
-define double @test_mul_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_mul_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = fmul double %a, %b
-; CHECK-NEXT:    ret double [[TMP1]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %2, <2 x double> %4)
-  %6 = extractelement <2 x double> %5, i32 0
-  ret double %6
-}
-
-define double @test_mul_sd_1(double %a, double %b) {
-; CHECK-LABEL: @test_mul_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %2, <2 x double> %4)
-  %6 = extractelement <2 x double> %5, i32 1
-  ret double %6
-}
-
-define double @test_div_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_div_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = fdiv double %a, %b
-; CHECK-NEXT:    ret double [[TMP1]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %2, <2 x double> %4)
-  %6 = extractelement <2 x double> %5, i32 0
-  ret double %6
-}
-
-define double @test_div_sd_1(double %a, double %b) {
-; CHECK-LABEL: @test_div_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %2, <2 x double> %4)
-  %6 = extractelement <2 x double> %5, i32 1
-  ret double %6
-}
-
-define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: @test_min_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a, <2 x double> %b)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a, <2 x double> %1)
-  ret <2 x double> %2
-}
-
-define double @test_min_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_min_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
-; CHECK-NEXT:    ret double [[TMP4]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %2, <2 x double> %4)
-  %6 = extractelement <2 x double> %5, i32 0
-  ret double %6
-}
-
-define double @test_min_sd_1(double %a, double %b) {
-; CHECK-LABEL: @test_min_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %2, <2 x double> %4)
-  %6 = extractelement <2 x double> %5, i32 1
-  ret double %6
-}
-
-define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: @test_max_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a, <2 x double> %b)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a, <2 x double> %1)
-  ret <2 x double> %2
-}
-
-define double @test_max_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_max_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
-; CHECK-NEXT:    ret double [[TMP4]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %2, <2 x double> %4)
-  %6 = extractelement <2 x double> %5, i32 0
-  ret double %6
-}
-
-define double @test_max_sd_1(double %a, double %b) {
-; CHECK-LABEL: @test_max_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %2, <2 x double> %4)
-  %6 = extractelement <2 x double> %5, i32 1
-  ret double %6
-}
-
-define <2 x double> @test_cmp_sd(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: @test_cmp_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a, <2 x double> %b, i8 0)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a, <2 x double> %1, i8 0)
-  ret <2 x double> %2
-}
-
-define double @test_cmp_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_cmp_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]], i8 0)
-; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
-; CHECK-NEXT:    ret double [[TMP4]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %2, <2 x double> %4, i8 0)
-  %6 = extractelement <2 x double> %5, i32 0
-  ret double %6
-}
-
-define double @test_cmp_sd_1(double %a, double %b) {
-; CHECK-LABEL: @test_cmp_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %2, <2 x double> %4, i8 0)
-  %6 = extractelement <2 x double> %5, i32 1
-  ret double %6
-}
-
-define i32 @test_comieq_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_comieq_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comieq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %2, <2 x double> %4)
-  ret i32 %5
-}
-
-define i32 @test_comige_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_comige_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comige.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call i32 @llvm.x86.sse2.comige.sd(<2 x double> %2, <2 x double> %4)
-  ret i32 %5
-}
-
-define i32 @test_comigt_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_comigt_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comigt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %2, <2 x double> %4)
-  ret i32 %5
-}
-
-define i32 @test_comile_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_comile_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comile.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call i32 @llvm.x86.sse2.comile.sd(<2 x double> %2, <2 x double> %4)
-  ret i32 %5
-}
-
-define i32 @test_comilt_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_comilt_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comilt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %2, <2 x double> %4)
-  ret i32 %5
-}
-
-define i32 @test_comineq_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_comineq_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comineq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %2, <2 x double> %4)
-  ret i32 %5
-}
-
-define i32 @test_ucomieq_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_ucomieq_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %2, <2 x double> %4)
-  ret i32 %5
-}
-
-define i32 @test_ucomige_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_ucomige_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %2, <2 x double> %4)
-  ret i32 %5
-}
-
-define i32 @test_ucomigt_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_ucomigt_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %2, <2 x double> %4)
-  ret i32 %5
-}
-
-define i32 @test_ucomile_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_ucomile_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %2, <2 x double> %4)
-  ret i32 %5
-}
-
-define i32 @test_ucomilt_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_ucomilt_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %2, <2 x double> %4)
-  ret i32 %5
-}
-
-define i32 @test_ucomineq_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_ucomineq_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
-; CHECK-NEXT:    ret i32 [[TMP3]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %2, <2 x double> %4)
-  ret i32 %5
-}
-
-declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
-
-declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>)
-declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>)
-declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>)
-declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>)
-declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>)
-declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>)
-declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8)
-
-declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>)
-declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>)
-declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>)
-declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>)
-declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>)
-declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>)
-
-declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>)
-declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>)
-declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>)
-declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>)
-declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>)
-declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>)

Removed: llvm/trunk/test/Transforms/InstCombine/x86-sse41.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-sse41.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-sse41.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-sse41.ll (removed)
@@ -1,98 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-define <2 x double> @test_round_sd(<2 x double> %a, <2 x double> %b) {
-; CHECK-LABEL: @test_round_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a, <2 x double> %b, i32 10)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 0
-  %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
-  %3 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %1, <2 x double> %2, i32 10)
-  ret <2 x double> %3
-}
-
-define double @test_round_sd_0(double %a, double %b) {
-; CHECK-LABEL: @test_round_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %b, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> undef, <2 x double> [[TMP1]], i32 10)
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
-; CHECK-NEXT:    ret double [[TMP3]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10)
-  %6 = extractelement <2 x double> %5, i32 0
-  ret double %6
-}
-
-define double @test_round_sd_1(double %a, double %b) {
-; CHECK-LABEL: @test_round_sd_1(
-; CHECK-NEXT:    ret double 1.000000e+00
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = insertelement <2 x double> undef, double %b, i32 0
-  %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
-  %5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10)
-  %6 = extractelement <2 x double> %5, i32 1
-  ret double %6
-}
-
-define <4 x float> @test_round_ss(<4 x float> %a, <4 x float> %b) {
-; CHECK-LABEL: @test_round_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x float> %b, i32 10)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
-  %5 = insertelement <4 x float> %4, float 2.000000e+00, i32 2
-  %6 = insertelement <4 x float> %5, float 3.000000e+00, i32 3
-  %7 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %3, <4 x float> %6, i32 10)
-  ret <4 x float> %7
-}
-
-define float @test_round_ss_0(float %a, float %b) {
-; CHECK-LABEL: @test_round_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %b, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> undef, <4 x float> [[TMP1]], i32 10)
-; CHECK-NEXT:    [[R:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
-; CHECK-NEXT:    ret float [[R]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %4, <4 x float> %8, i32 10)
-  %r = extractelement <4 x float> %9, i32 0
-  ret float %r
-}
-
-define float @test_round_ss_2(float %a, float %b) {
-; CHECK-LABEL: @test_round_ss_2(
-; CHECK-NEXT:    ret float 2.000000e+00
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = insertelement <4 x float> undef, float %b, i32 0
-  %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
-  %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
-  %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
-  %9 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %4, <4 x float> %8, i32 10)
-  %r = extractelement <4 x float> %9, i32 2
-  ret float %r
-}
-
-declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
-declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone

Removed: llvm/trunk/test/Transforms/InstCombine/x86-sse4a.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-sse4a.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-sse4a.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-sse4a.ll (removed)
@@ -1,408 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-;
-; EXTRQ
-;
-
-define <2 x i64> @test_extrq_call(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_call(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @test_extrq_zero_arg0(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_zero_arg0(
-; CHECK-NEXT:    ret <2 x i64> <i64 0, i64 undef>
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> zeroinitializer, <16 x i8> %y) nounwind
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_zero_arg1(
-; CHECK-NEXT:    ret <2 x i64> %x
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> zeroinitializer) nounwind
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @test_extrq_to_extqi(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_to_extqi(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 15)
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @test_extrq_constant(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_constant(
-; CHECK-NEXT:    ret <2 x i64> <i64 255, i64 undef>
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 55>, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_constant_undef(
-; CHECK-NEXT:    ret <2 x i64> <i64 65535, i64 undef>
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 undef>, <16 x i8> <i8 16, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @test_extrq_call_constexpr(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrq_call_constexpr(
-; CHECK-NEXT:    ret <2 x i64> %x
-;
-  %1 = call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> bitcast (<2 x i64> <i64 0, i64 undef> to <16 x i8>))
-  ret <2 x i64> %1
-}
-
-;
-; EXTRQI
-;
-
-define <2 x i64> @test_extrqi_call(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_call(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_shuffle_1zuu(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %x to <16 x i8>
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
-; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 32, i8 32)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %x to <16 x i8>
-; CHECK-NEXT:    [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 2, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
-; CHECK-NEXT:    ret <2 x i64> [[TMP3]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 16)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @test_extrqi_undef(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_undef(
-; CHECK-NEXT:    ret <2 x i64> undef
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 32, i8 33)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @test_extrqi_zero(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_zero(
-; CHECK-NEXT:    ret <2 x i64> <i64 0, i64 undef>
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 3, i8 18)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @test_extrqi_constant(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_constant(
-; CHECK-NEXT:    ret <2 x i64> <i64 7, i64 undef>
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 55>, i8 3, i8 18)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_constant_undef(
-; CHECK-NEXT:    ret <2 x i64> <i64 15, i64 undef>
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 undef>, i8 4, i8 18)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @test_extrqi_call_constexpr() {
-; CHECK-LABEL: @test_extrqi_call_constexpr(
-; CHECK-NEXT:    ret <2 x i64> zeroinitializer
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 8, i8 16)
-  ret <2 x i64> %1
-}
-
-;
-; INSERTQ
-;
-
-define <2 x i64> @test_insertq_call(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_call(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) #1
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @test_insertq_to_insertqi(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_to_insertqi(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> <i64 8, i64 undef>, i8 18, i8 2)
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> <i64 8, i64 658>) nounwind
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @test_insertq_constant(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_constant(
-; CHECK-NEXT:    ret <2 x i64> <i64 32, i64 undef>
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 8, i64 658>) nounwind
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_constant_undef(
-; CHECK-NEXT:    ret <2 x i64> <i64 33, i64 undef>
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 1, i64 undef>, <2 x i64> <i64 8, i64 658>) nounwind
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @test_insertq_call_constexpr(<2 x i64> %x) {
-; CHECK-LABEL: @test_insertq_call_constexpr(
-; CHECK-NEXT:    [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> <i64 0, i64 undef>, i8 2, i8 0)
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>))
-  ret <2 x i64> %1
-}
-
-;
-; INSERTQI
-;
-
-define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) {
-; CHECK-LABEL: @test_insertqi_shuffle_04uu(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
-;
-  %1 = bitcast <16 x i8> %v to <2 x i64>
-  %2 = bitcast <16 x i8> %i to <2 x i64>
-  %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 32, i8 32)
-  %4 = bitcast <2 x i64> %3 to <16 x i8>
-  ret <16 x i8> %4
-}
-
-define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) {
-; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT:    ret <16 x i8> [[TMP1]]
-;
-  %1 = bitcast <16 x i8> %v to <2 x i64>
-  %2 = bitcast <16 x i8> %i to <2 x i64>
-  %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 16, i8 0)
-  %4 = bitcast <2 x i64> %3 to <16 x i8>
-  ret <16 x i8> %4
-}
-
-define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @test_insertqi_constant(
-; CHECK-NEXT:    ret <2 x i64> <i64 -131055, i64 undef>
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 8, i64 0>, i8 16, i8 1)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @test_insertqi_call_constexpr(<2 x i64> %x) {
-; CHECK-LABEL: @test_insertqi_call_constexpr(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> <i64 0, i64 undef>, i8 48, i8 3)
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> bitcast (<16 x i8> trunc (<16 x i16> bitcast (<4 x i64> <i64 0, i64 undef, i64 2, i64 undef> to <16 x i16>) to <16 x i8>) to <2 x i64>), i8 48, i8 3)
-  ret <2 x i64> %1
-}
-
-; The result of this insert is the second arg, since the top 64 bits of
-; the result are undefined, and we copy the bottom 64 bits from the
-; second arg
-define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testInsert64Bits(
-; CHECK-NEXT:    ret <2 x i64> %i
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testZeroLength(
-; CHECK-NEXT:    ret <2 x i64> %i
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testUndefinedInsertq_1(
-; CHECK-NEXT:    ret <2 x i64> undef
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testUndefinedInsertq_2(
-; CHECK-NEXT:    ret <2 x i64> undef
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testUndefinedInsertq_3(
-; CHECK-NEXT:    ret <2 x i64> undef
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
-  ret <2 x i64> %1
-}
-
-;
-; Vector Demanded Bits
-;
-
-define <2 x i64> @test_extrq_arg0(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_arg0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %y) nounwind
-  ret <2 x i64> %2
-}
-
-define <2 x i64> @test_extrq_arg1(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_arg1(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind
-  ret <2 x i64> %2
-}
-
-define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_args01(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
-  %2 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
-  %3 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %2) nounwind
-  ret <2 x i64> %3
-}
-
-define <2 x i64> @test_extrq_ret(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_ret(
-; CHECK-NEXT:    ret <2 x i64> undef
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
-  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
-  ret <2 x i64> %2
-}
-
-define <2 x i64> @test_extrqi_arg0(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_arg0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %1, i8 3, i8 2)
-  ret <2 x i64> %2
-}
-
-define <2 x i64> @test_extrqi_ret(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_ret(
-; CHECK-NEXT:    ret <2 x i64> undef
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) nounwind
-  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
-  ret <2 x i64> %2
-}
-
-define <2 x i64> @test_insertq_arg0(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_arg0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) #1
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %1, <2 x i64> %y) nounwind
-  ret <2 x i64> %2
-}
-
-define <2 x i64> @test_insertq_ret(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_ret(
-; CHECK-NEXT:    ret <2 x i64> undef
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
-  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
-  ret <2 x i64> %2
-}
-
-define <2 x i64> @test_insertqi_arg0(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertqi_arg0(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) #1
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %y, i8 3, i8 2) nounwind
-  ret <2 x i64> %2
-}
-
-define <2 x i64> @test_insertqi_arg1(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertqi_arg1(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) #1
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
-  %2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %1, i8 3, i8 2) nounwind
-  ret <2 x i64> %2
-}
-
-define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertqi_args01(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) #1
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
-  %2 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
-  %3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 3, i8 2) nounwind
-  ret <2 x i64> %3
-}
-
-define <2 x i64> @test_insertqi_ret(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertqi_ret(
-; CHECK-NEXT:    ret <2 x i64> undef
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) nounwind
-  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
-  ret <2 x i64> %2
-}
-
-; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrq
-declare <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64>, <16 x i8>) nounwind
-
-; CHECK: declare <2 x i64> @llvm.x86.sse4a.extrqi
-declare <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64>, i8, i8) nounwind
-
-; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertq
-declare <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64>, <2 x i64>) nounwind
-
-; CHECK: declare <2 x i64> @llvm.x86.sse4a.insertqi
-declare <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64>, <2 x i64>, i8, i8) nounwind

Removed: llvm/trunk/test/Transforms/InstCombine/x86-vector-shifts.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-vector-shifts.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-vector-shifts.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-vector-shifts.ll (removed)
@@ -1,3434 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-;
-; ASHR - Immediate
-;
-
-define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psrai_w_0(
-; CHECK-NEXT:    ret <8 x i16> %v
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psrai_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psrai_w_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64)
-  ret <8 x i16> %1
-}
-
-define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psrai_d_0(
-; CHECK-NEXT:    ret <4 x i32> %v
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0)
-  ret <4 x i32> %1
-}
-
-define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psrai_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15)
-  ret <4 x i32> %1
-}
-
-define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psrai_d_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64)
-  ret <4 x i32> %1
-}
-
-define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psrai_w_0(
-; CHECK-NEXT:    ret <16 x i16> %v
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psrai_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psrai_w_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64)
-  ret <16 x i16> %1
-}
-
-define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrai_d_0(
-; CHECK-NEXT:    ret <8 x i32> %v
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0)
-  ret <8 x i32> %1
-}
-
-define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrai_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15)
-  ret <8 x i32> %1
-}
-
-define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrai_d_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64)
-  ret <8 x i32> %1
-}
-
-define <2 x i64> @avx512_psrai_q_128_0(<2 x i64> %v) {
-; CHECK-LABEL: @avx512_psrai_q_128_0(
-; CHECK-NEXT:    ret <2 x i64> %v
-;
-  %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 0)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @avx512_psrai_q_128_15(<2 x i64> %v) {
-; CHECK-LABEL: @avx512_psrai_q_128_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 15, i64 15>
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 15)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @avx512_psrai_q_128_64(<2 x i64> %v) {
-; CHECK-LABEL: @avx512_psrai_q_128_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 63, i64 63>
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64> %v, i32 64)
-  ret <2 x i64> %1
-}
-
-define <4 x i64> @avx512_psrai_q_256_0(<4 x i64> %v) {
-; CHECK-LABEL: @avx512_psrai_q_256_0(
-; CHECK-NEXT:    ret <4 x i64> %v
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 0)
-  ret <4 x i64> %1
-}
-
-define <4 x i64> @avx512_psrai_q_256_15(<4 x i64> %v) {
-; CHECK-LABEL: @avx512_psrai_q_256_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 15)
-  ret <4 x i64> %1
-}
-
-define <4 x i64> @avx512_psrai_q_256_64(<4 x i64> %v) {
-; CHECK-LABEL: @avx512_psrai_q_256_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 63, i64 63, i64 63, i64 63>
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64> %v, i32 64)
-  ret <4 x i64> %1
-}
-
-define <32 x i16> @avx512_psrai_w_512_0(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psrai_w_512_0(
-; CHECK-NEXT:    ret <32 x i16> %v
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 0)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psrai_w_512_15(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psrai_w_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 15)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psrai_w_512_64(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psrai_w_512_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %v, i32 64)
-  ret <32 x i16> %1
-}
-
-define <16 x i32> @avx512_psrai_d_512_0(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psrai_d_512_0(
-; CHECK-NEXT:    ret <16 x i32> %v
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 0)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_psrai_d_512_15(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psrai_d_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 15)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_psrai_d_512_64(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psrai_d_512_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %v, i32 64)
-  ret <16 x i32> %1
-}
-
-define <8 x i64> @avx512_psrai_q_512_0(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psrai_q_512_0(
-; CHECK-NEXT:    ret <8 x i64> %v
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 0)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_psrai_q_512_15(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psrai_q_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 15)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_psrai_q_512_64(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psrai_q_512_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64> %v, i32 64)
-  ret <8 x i64> %1
-}
-
-;
-; LSHR - Immediate
-;
-
-define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psrli_w_0(
-; CHECK-NEXT:    ret <8 x i16> %v
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psrli_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psrli_w_64(
-; CHECK-NEXT:    ret <8 x i16> zeroinitializer
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
-  ret <8 x i16> %1
-}
-
-define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psrli_d_0(
-; CHECK-NEXT:    ret <4 x i32> %v
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
-  ret <4 x i32> %1
-}
-
-define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psrli_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
-  ret <4 x i32> %1
-}
-
-define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psrli_d_64(
-; CHECK-NEXT:    ret <4 x i32> zeroinitializer
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
-  ret <4 x i32> %1
-}
-
-define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_psrli_q_0(
-; CHECK-NEXT:    ret <2 x i64> %v
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_psrli_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 15, i64 15>
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_psrli_q_64(
-; CHECK-NEXT:    ret <2 x i64> zeroinitializer
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
-  ret <2 x i64> %1
-}
-
-define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psrli_w_0(
-; CHECK-NEXT:    ret <16 x i16> %v
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psrli_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psrli_w_64(
-; CHECK-NEXT:    ret <16 x i16> zeroinitializer
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
-  ret <16 x i16> %1
-}
-
-define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrli_d_0(
-; CHECK-NEXT:    ret <8 x i32> %v
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
-  ret <8 x i32> %1
-}
-
-define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrli_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
-  ret <8 x i32> %1
-}
-
-define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrli_d_64(
-; CHECK-NEXT:    ret <8 x i32> zeroinitializer
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
-  ret <8 x i32> %1
-}
-
-define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psrli_q_0(
-; CHECK-NEXT:    ret <4 x i64> %v
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
-  ret <4 x i64> %1
-}
-
-define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psrli_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
-  ret <4 x i64> %1
-}
-
-define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psrli_q_64(
-; CHECK-NEXT:    ret <4 x i64> zeroinitializer
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
-  ret <4 x i64> %1
-}
-
-define <32 x i16> @avx512_psrli_w_512_0(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psrli_w_512_0(
-; CHECK-NEXT:    ret <32 x i16> %v
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 0)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psrli_w_512_15(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psrli_w_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 15)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psrli_w_512_64(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psrli_w_512_64(
-; CHECK-NEXT:    ret <32 x i16> zeroinitializer
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %v, i32 64)
-  ret <32 x i16> %1
-}
-
-define <16 x i32> @avx512_psrli_d_512_0(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psrli_d_512_0(
-; CHECK-NEXT:    ret <16 x i32> %v
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 0)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_psrli_d_512_15(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psrli_d_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 15)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_psrli_d_512_64(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psrli_d_512_64(
-; CHECK-NEXT:    ret <16 x i32> zeroinitializer
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32> %v, i32 64)
-  ret <16 x i32> %1
-}
-
-define <8 x i64> @avx512_psrli_q_512_0(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psrli_q_512_0(
-; CHECK-NEXT:    ret <8 x i64> %v
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 0)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_psrli_q_512_15(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psrli_q_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> %v, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 15)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_psrli_q_512_64(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psrli_q_512_64(
-; CHECK-NEXT:    ret <8 x i64> zeroinitializer
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64> %v, i32 64)
-  ret <8 x i64> %1
-}
-
-;
-; SHL - Immediate
-;
-
-define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_pslli_w_0(
-; CHECK-NEXT:    ret <8 x i16> %v
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_pslli_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_pslli_w_64(
-; CHECK-NEXT:    ret <8 x i16> zeroinitializer
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
-  ret <8 x i16> %1
-}
-
-define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_pslli_d_0(
-; CHECK-NEXT:    ret <4 x i32> %v
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
-  ret <4 x i32> %1
-}
-
-define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_pslli_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
-  ret <4 x i32> %1
-}
-
-define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_pslli_d_64(
-; CHECK-NEXT:    ret <4 x i32> zeroinitializer
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
-  ret <4 x i32> %1
-}
-
-define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_pslli_q_0(
-; CHECK-NEXT:    ret <2 x i64> %v
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_pslli_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> %v, <i64 15, i64 15>
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_pslli_q_64(
-; CHECK-NEXT:    ret <2 x i64> zeroinitializer
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
-  ret <2 x i64> %1
-}
-
-define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_pslli_w_0(
-; CHECK-NEXT:    ret <16 x i16> %v
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_pslli_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_pslli_w_64(
-; CHECK-NEXT:    ret <16 x i16> zeroinitializer
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
-  ret <16 x i16> %1
-}
-
-define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_pslli_d_0(
-; CHECK-NEXT:    ret <8 x i32> %v
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
-  ret <8 x i32> %1
-}
-
-define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_pslli_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
-  ret <8 x i32> %1
-}
-
-define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_pslli_d_64(
-; CHECK-NEXT:    ret <8 x i32> zeroinitializer
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
-  ret <8 x i32> %1
-}
-
-define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_pslli_q_0(
-; CHECK-NEXT:    ret <4 x i64> %v
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
-  ret <4 x i64> %1
-}
-
-define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_pslli_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
-  ret <4 x i64> %1
-}
-
-define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_pslli_q_64(
-; CHECK-NEXT:    ret <4 x i64> zeroinitializer
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
-  ret <4 x i64> %1
-}
-
-define <32 x i16> @avx512_pslli_w_512_0(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_pslli_w_512_0(
-; CHECK-NEXT:    ret <32 x i16> %v
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 0)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_pslli_w_512_15(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_pslli_w_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 15)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_pslli_w_512_64(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_pslli_w_512_64(
-; CHECK-NEXT:    ret <32 x i16> zeroinitializer
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %v, i32 64)
-  ret <32 x i16> %1
-}
-
-define <16 x i32> @avx512_pslli_d_512_0(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_pslli_d_512_0(
-; CHECK-NEXT:    ret <16 x i32> %v
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 0)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_pslli_d_512_15(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_pslli_d_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 15)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_pslli_d_512_64(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_pslli_d_512_64(
-; CHECK-NEXT:    ret <16 x i32> zeroinitializer
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32> %v, i32 64)
-  ret <16 x i32> %1
-}
-
-define <8 x i64> @avx512_pslli_q_512_0(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_pslli_q_512_0(
-; CHECK-NEXT:    ret <8 x i64> %v
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 0)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_pslli_q_512_15(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_pslli_q_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> %v, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 15)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_pslli_q_512_64(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_pslli_q_512_64(
-; CHECK-NEXT:    ret <8 x i64> zeroinitializer
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64> %v, i32 64)
-  ret <8 x i64> %1
-}
-
-;
-; ASHR - Constant Vector
-;
-
-define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psra_w_0(
-; CHECK-NEXT:    ret <8 x i16> %v
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psra_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psra_w_15_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psra_w_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
-  ret <8 x i16> %1
-}
-
-define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psra_d_0(
-; CHECK-NEXT:    ret <4 x i32> %v
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer)
-  ret <4 x i32> %1
-}
-
-define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psra_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
-  ret <4 x i32> %1
-}
-
-define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psra_d_15_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
-  ret <4 x i32> %1
-}
-
-define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psra_d_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
-  ret <4 x i32> %1
-}
-
-define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psra_w_0(
-; CHECK-NEXT:    ret <16 x i16> %v
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psra_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psra_w_15_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psra_w_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
-  ret <16 x i16> %1
-}
-
-define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psra_d_0(
-; CHECK-NEXT:    ret <8 x i32> %v
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer)
-  ret <8 x i32> %1
-}
-
-define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psra_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
-  ret <8 x i32> %1
-}
-
-define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psra_d_15_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
-  ret <8 x i32> %1
-}
-
-define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psra_d_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
-  ret <8 x i32> %1
-}
-
-define <2 x i64> @avx512_psra_q_128_0(<2 x i64> %v) {
-; CHECK-LABEL: @avx512_psra_q_128_0(
-; CHECK-NEXT:    ret <2 x i64> %v
-;
-  %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> zeroinitializer)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @avx512_psra_q_128_15(<2 x i64> %v) {
-; CHECK-LABEL: @avx512_psra_q_128_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 15, i64 15>
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @avx512_psra_q_128_64(<2 x i64> %v) {
-; CHECK-LABEL: @avx512_psra_q_128_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 63, i64 63>
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
-  ret <2 x i64> %1
-}
-
-define <4 x i64> @avx512_psra_q_256_0(<4 x i64> %v) {
-; CHECK-LABEL: @avx512_psra_q_256_0(
-; CHECK-NEXT:    ret <4 x i64> %v
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> zeroinitializer)
-  ret <4 x i64> %1
-}
-
-define <4 x i64> @avx512_psra_q_256_15(<4 x i64> %v) {
-; CHECK-LABEL: @avx512_psra_q_256_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
-  ret <4 x i64> %1
-}
-
-define <4 x i64> @avx512_psra_q_256_64(<4 x i64> %v) {
-; CHECK-LABEL: @avx512_psra_q_256_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 63, i64 63, i64 63, i64 63>
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
-  ret <4 x i64> %1
-}
-
-define <32 x i16> @avx512_psra_w_512_0(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psra_w_512_0(
-; CHECK-NEXT:    ret <32 x i16> %v
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> zeroinitializer)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psra_w_512_15(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psra_w_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psra_w_512_15_splat(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psra_w_512_15_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psra_w_512_64(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psra_w_512_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
-  ret <32 x i16> %1
-}
-
-define <16 x i32> @avx512_psra_d_512_0(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psra_d_512_0(
-; CHECK-NEXT:    ret <16 x i32> %v
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> zeroinitializer)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_psra_d_512_15(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psra_d_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_psra_d_512_15_splat(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psra_d_512_15_splat(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_psra_d_512_64(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psra_d_512_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
-  ret <16 x i32> %1
-}
-
-define <8 x i64> @avx512_psra_q_512_0(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psra_q_512_0(
-; CHECK-NEXT:    ret <8 x i64> %v
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> zeroinitializer)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_psra_q_512_15(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psra_q_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_psra_q_512_64(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psra_q_512_64(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63, i64 63>
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>)
-  ret <8 x i64> %1
-}
-
-;
-; LSHR - Constant Vector
-;
-
-define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psrl_w_0(
-; CHECK-NEXT:    ret <8 x i16> %v
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psrl_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psrl_w_15_splat(
-; CHECK-NEXT:    ret <8 x i16> zeroinitializer
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psrl_w_64(
-; CHECK-NEXT:    ret <8 x i16> zeroinitializer
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
-  ret <8 x i16> %1
-}
-
-define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psrl_d_0(
-; CHECK-NEXT:    ret <4 x i32> %v
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
-  ret <4 x i32> %1
-}
-
-define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psrl_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
-  ret <4 x i32> %1
-}
-
-define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psrl_d_15_splat(
-; CHECK-NEXT:    ret <4 x i32> zeroinitializer
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
-  ret <4 x i32> %1
-}
-
-define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psrl_d_64(
-; CHECK-NEXT:    ret <4 x i32> zeroinitializer
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
-  ret <4 x i32> %1
-}
-
-define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_psrl_q_0(
-; CHECK-NEXT:    ret <2 x i64> %v
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_psrl_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 15, i64 15>
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_psrl_q_64(
-; CHECK-NEXT:    ret <2 x i64> zeroinitializer
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
-  ret <2 x i64> %1
-}
-
-define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psrl_w_0(
-; CHECK-NEXT:    ret <16 x i16> %v
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psrl_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psrl_w_15_splat(
-; CHECK-NEXT:    ret <16 x i16> zeroinitializer
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psrl_w_64(
-; CHECK-NEXT:    ret <16 x i16> zeroinitializer
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
-  ret <16 x i16> %1
-}
-
-define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrl_d_0(
-; CHECK-NEXT:    ret <8 x i32> %v
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
-  ret <8 x i32> %1
-}
-
-define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrl_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
-  ret <8 x i32> %1
-}
-
-define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrl_d_15_splat(
-; CHECK-NEXT:    ret <8 x i32> zeroinitializer
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
-  ret <8 x i32> %1
-}
-
-define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrl_d_64(
-; CHECK-NEXT:    ret <8 x i32> zeroinitializer
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
-  ret <8 x i32> %1
-}
-
-define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psrl_q_0(
-; CHECK-NEXT:    ret <4 x i64> %v
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
-  ret <4 x i64> %1
-}
-
-define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psrl_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
-  ret <4 x i64> %1
-}
-
-define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psrl_q_64(
-; CHECK-NEXT:    ret <4 x i64> zeroinitializer
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
-  ret <4 x i64> %1
-}
-
-define <32 x i16> @avx512_psrl_w_512_0(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psrl_w_512_0(
-; CHECK-NEXT:    ret <32 x i16> %v
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> zeroinitializer)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psrl_w_512_15(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psrl_w_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psrl_w_512_15_splat(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psrl_w_512_15_splat(
-; CHECK-NEXT:    ret <32 x i16> zeroinitializer
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psrl_w_512_64(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psrl_w_512_64(
-; CHECK-NEXT:    ret <32 x i16> zeroinitializer
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
-  ret <32 x i16> %1
-}
-
-define <16 x i32> @avx512_psrl_d_512_0(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psrl_d_512_0(
-; CHECK-NEXT:    ret <16 x i32> %v
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> zeroinitializer)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_psrl_d_512_15(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psrl_d_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_psrl_d_512_15_splat(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psrl_d_512_15_splat(
-; CHECK-NEXT:    ret <16 x i32> zeroinitializer
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_psrl_d_512_64(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psrl_d_512_64(
-; CHECK-NEXT:    ret <16 x i32> zeroinitializer
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
-  ret <16 x i32> %1
-}
-
-define <8 x i64> @avx512_psrl_q_512_0(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psrl_q_512_0(
-; CHECK-NEXT:    ret <8 x i64> %v
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> zeroinitializer)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_psrl_q_512_15(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psrl_q_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> %v, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_psrl_q_512_64(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psrl_q_512_64(
-; CHECK-NEXT:    ret <8 x i64> zeroinitializer
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>)
-  ret <8 x i64> %1
-}
-
-;
-; SHL - Constant Vector
-;
-
-define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psll_w_0(
-; CHECK-NEXT:    ret <8 x i16> %v
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psll_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psll_w_15_splat(
-; CHECK-NEXT:    ret <8 x i16> zeroinitializer
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psll_w_64(
-; CHECK-NEXT:    ret <8 x i16> zeroinitializer
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
-  ret <8 x i16> %1
-}
-
-define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psll_d_0(
-; CHECK-NEXT:    ret <4 x i32> %v
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
-  ret <4 x i32> %1
-}
-
-define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psll_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
-  ret <4 x i32> %1
-}
-
-define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psll_d_15_splat(
-; CHECK-NEXT:    ret <4 x i32> zeroinitializer
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
-  ret <4 x i32> %1
-}
-
-define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psll_d_64(
-; CHECK-NEXT:    ret <4 x i32> zeroinitializer
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
-  ret <4 x i32> %1
-}
-
-define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_psll_q_0(
-; CHECK-NEXT:    ret <2 x i64> %v
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_psll_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> %v, <i64 15, i64 15>
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_psll_q_64(
-; CHECK-NEXT:    ret <2 x i64> zeroinitializer
-;
-  %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
-  ret <2 x i64> %1
-}
-
-define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psll_w_0(
-; CHECK-NEXT:    ret <16 x i16> %v
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psll_w_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psll_w_15_splat(
-; CHECK-NEXT:    ret <16 x i16> zeroinitializer
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psll_w_64(
-; CHECK-NEXT:    ret <16 x i16> zeroinitializer
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
-  ret <16 x i16> %1
-}
-
-define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psll_d_0(
-; CHECK-NEXT:    ret <8 x i32> %v
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
-  ret <8 x i32> %1
-}
-
-define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psll_d_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
-  ret <8 x i32> %1
-}
-
-define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psll_d_15_splat(
-; CHECK-NEXT:    ret <8 x i32> zeroinitializer
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
-  ret <8 x i32> %1
-}
-
-define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psll_d_64(
-; CHECK-NEXT:    ret <8 x i32> zeroinitializer
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
-  ret <8 x i32> %1
-}
-
-define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psll_q_0(
-; CHECK-NEXT:    ret <4 x i64> %v
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
-  ret <4 x i64> %1
-}
-
-define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psll_q_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
-  ret <4 x i64> %1
-}
-
-define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psll_q_64(
-; CHECK-NEXT:    ret <4 x i64> zeroinitializer
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
-  ret <4 x i64> %1
-}
-
-define <32 x i16> @avx512_psll_w_512_0(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psll_w_512_0(
-; CHECK-NEXT:    ret <32 x i16> %v
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> zeroinitializer)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psll_w_512_15(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psll_w_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psll_w_15_512_splat(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psll_w_15_512_splat(
-; CHECK-NEXT:    ret <32 x i16> zeroinitializer
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psll_w_512_64(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psll_w_512_64(
-; CHECK-NEXT:    ret <32 x i16> zeroinitializer
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
-  ret <32 x i16> %1
-}
-
-define <16 x i32> @avx512_psll_d_512_0(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psll_d_512_0(
-; CHECK-NEXT:    ret <16 x i32> %v
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> zeroinitializer)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_psll_d_512_15(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psll_d_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_psll_d_512_15_splat(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psll_d_512_15_splat(
-; CHECK-NEXT:    ret <16 x i32> zeroinitializer
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_psll_d_512_64(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psll_d_512_64(
-; CHECK-NEXT:    ret <16 x i32> zeroinitializer
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
-  ret <16 x i32> %1
-}
-
-define <8 x i64> @avx512_psll_q_512_0(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psll_q_512_0(
-; CHECK-NEXT:    ret <8 x i64> %v
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> zeroinitializer)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_psll_q_512_15(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psll_q_512_15(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> %v, <i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15, i64 15>
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> <i64 15, i64 9999>)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_psll_q_512_64(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psll_q_512_64(
-; CHECK-NEXT:    ret <8 x i64> zeroinitializer
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> <i64 64, i64 9999>)
-  ret <8 x i64> %1
-}
-
-;
-; ASHR - Constant Per-Element Vector
-;
-
-define <4 x i32> @avx2_psrav_d_128_0(<4 x i32> %v) {
-; CHECK-LABEL: @avx2_psrav_d_128_0(
-; CHECK-NEXT:    ret <4 x i32> %v
-;
-  %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> zeroinitializer)
-  ret <4 x i32> %1
-}
-
-define <8 x i32> @avx2_psrav_d_256_0(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrav_d_256_0(
-; CHECK-NEXT:    ret <8 x i32> %v
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
-  ret <8 x i32> %1
-}
-
-define <16 x i32> @avx512_psrav_d_512_0(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psrav_d_512_0(
-; CHECK-NEXT:    ret <16 x i32> %v
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> zeroinitializer)
-  ret <16 x i32> %1
-}
-
-define <4 x i32> @avx2_psrav_d_128_var(<4 x i32> %v) {
-; CHECK-LABEL: @avx2_psrav_d_128_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31>
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
-  ret <4 x i32> %1
-}
-
-define <8 x i32> @avx2_psrav_d_256_var(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrav_d_256_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>)
-  ret <8 x i32> %1
-}
-
-define <16 x i32> @avx512_psrav_d_512_var(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psrav_d_512_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>)
-  ret <16 x i32> %1
-}
-
-define <4 x i32> @avx2_psrav_d_128_allbig(<4 x i32> %v) {
-; CHECK-LABEL: @avx2_psrav_d_128_allbig(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 undef>
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
-  ret <4 x i32> %1
-}
-
-define <8 x i32> @avx2_psrav_d_256_allbig(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrav_d_256_allbig(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
-  ret <8 x i32> %1
-}
-
-define <16 x i32> @avx512_psrav_d_512_allbig(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psrav_d_512_allbig(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
-  ret <16 x i32> %1
-}
-
-define <4 x i32> @avx2_psrav_d_128_undef(<4 x i32> %v) {
-; CHECK-LABEL: @avx2_psrav_d_128_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31>
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 64>, i32 undef, i32 0
-  %2 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %1)
-  ret <4 x i32> %2
-}
-
-define <8 x i32> @avx2_psrav_d_256_undef(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrav_d_256_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>, i32 undef, i32 1
-  %2 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> %1)
-  ret <8 x i32> %2
-}
-
-define <16 x i32> @avx512_psrav_d_512_undef(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psrav_d_512_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i32> %v, <i32 0, i32 undef, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>, i32 undef, i32 1
-  %2 = tail call <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32> %v, <16 x i32> %1)
-  ret <16 x i32> %2
-}
-
-define <2 x i64> @avx512_psrav_q_128_0(<2 x i64> %v) {
-; CHECK-LABEL: @avx512_psrav_q_128_0(
-; CHECK-NEXT:    ret <2 x i64> %v
-;
-  %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> zeroinitializer)
-  ret <2 x i64> %1
-}
-
-define <4 x i64> @avx512_psrav_q_256_0(<4 x i64> %v) {
-; CHECK-LABEL: @avx512_psrav_q_256_0(
-; CHECK-NEXT:    ret <4 x i64> %v
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
-  ret <4 x i64> %1
-}
-
-define <2 x i64> @avx512_psrav_q_128_var(<2 x i64> %v) {
-; CHECK-LABEL: @avx512_psrav_q_128_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 0, i64 8>
-  %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
-  ret <2 x i64> %1
-}
-
-define <4 x i64> @avx512_psrav_q_256_var(<4 x i64> %v) {
-; CHECK-LABEL: @avx512_psrav_q_256_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 0, i64 8, i64 16, i64 31>
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
-  ret <4 x i64> %1
-}
-
-define <2 x i64> @avx512_psrav_q_128_allbig(<2 x i64> %v) {
-; CHECK-LABEL: @avx512_psrav_q_128_allbig(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 63, i64 undef>
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> <i64 64, i64 undef>)
-  ret <2 x i64> %1
-}
-
-define <4 x i64> @avx512_psrav_q_256_allbig(<4 x i64> %v) {
-; CHECK-LABEL: @avx512_psrav_q_256_allbig(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 63, i64 undef, i64 63, i64 63>
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
-  ret <4 x i64> %1
-}
-
-define <2 x i64> @avx512_psrav_q_128_undef(<2 x i64> %v) {
-; CHECK-LABEL: @avx512_psrav_q_128_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <2 x i64> %v, <i64 undef, i64 8>
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 0
-  %2 = tail call <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64> %v, <2 x i64> %1)
-  ret <2 x i64> %2
-}
-
-define <4 x i64> @avx512_psrav_q_256_undef(<4 x i64> %v) {
-; CHECK-LABEL: @avx512_psrav_q_256_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <4 x i64> %v, <i64 undef, i64 8, i64 16, i64 31>
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
-  %2 = tail call <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64> %v, <4 x i64> %1)
-  ret <4 x i64> %2
-}
-
-define <8 x i64> @avx512_psrav_q_512_0(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psrav_q_512_0(
-; CHECK-NEXT:    ret <8 x i64> %v
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> zeroinitializer)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_psrav_q_512_var(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psrav_q_512_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_psrav_q_512_allbig(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psrav_q_512_allbig(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 63, i64 undef, i64 63, i64 63, i64 63, i64 undef, i64 63, i64 63>
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_psrav_q_512_undef(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psrav_q_512_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i64> %v, <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
-  %2 = tail call <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64> %v, <8 x i64> %1)
-  ret <8 x i64> %2
-}
-
-define <8 x i16> @avx512_psrav_w_128_0(<8 x i16> %v) {
-; CHECK-LABEL: @avx512_psrav_w_128_0(
-; CHECK-NEXT:    ret <8 x i16> %v
-;
-  %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> zeroinitializer)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @avx512_psrav_w_128_var(<8 x i16> %v) {
-; CHECK-LABEL: @avx512_psrav_w_128_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @avx512_psrav_w_128_allbig(<8 x i16> %v) {
-; CHECK-LABEL: @avx512_psrav_w_128_allbig(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef>
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-;
-  %1 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 undef>)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @avx512_psrav_w_128_undef(<8 x i16> %v) {
-; CHECK-LABEL: @avx512_psrav_w_128_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %1 = insertelement <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i64 0
-  %2 = tail call <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16> %v, <8 x i16> %1)
-  ret <8 x i16> %2
-}
-
-define <16 x i16> @avx512_psrav_w_256_0(<16 x i16> %v) {
-; CHECK-LABEL: @avx512_psrav_w_256_0(
-; CHECK-NEXT:    ret <16 x i16> %v
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> zeroinitializer)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx512_psrav_w_256_var(<16 x i16> %v) {
-; CHECK-LABEL: @avx512_psrav_w_256_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx512_psrav_w_256_allbig(<16 x i16> %v) {
-; CHECK-LABEL: @avx512_psrav_w_256_allbig(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 256, i16 16, i16 28, i16 65535, i16 32767>)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx512_psrav_w_256_undef(<16 x i16> %v) {
-; CHECK-LABEL: @avx512_psrav_w_256_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %1 = insertelement <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, i16 undef, i64 0
-  %2 = tail call <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16> %v, <16 x i16> %1)
-  ret <16 x i16> %2
-}
-
-define <32 x i16> @avx512_psrav_w_512_0(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psrav_w_512_0(
-; CHECK-NEXT:    ret <32 x i16> %v
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> zeroinitializer)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psrav_w_512_var(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psrav_w_512_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psrav_w_512_allbig(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psrav_w_512_allbig(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 undef, i16 15, i16 15, i16 undef, i16 15, i16 15>
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 128, i16 16, i16 28, i16 65535, i16 32767, i16 56, i16 -14, i16 undef, i16 16, i16 67, i16 567, i16 -32768, i16 4096, i16 8192, i16 -12345, i16 undef, i16 345, i16 123, i16 undef, i16 1024, i16 54321>)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psrav_w_512_undef(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psrav_w_512_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = ashr <32 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = insertelement <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 undef, i64 0
-  %2 = tail call <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16> %v, <32 x i16> %1)
-  ret <32 x i16> %2
-}
-
-;
-; LSHR - Constant Per-Element Vector
-;
-
-define <4 x i32> @avx2_psrlv_d_128_0(<4 x i32> %v) {
-; CHECK-LABEL: @avx2_psrlv_d_128_0(
-; CHECK-NEXT:    ret <4 x i32> %v
-;
-  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> zeroinitializer)
-  ret <4 x i32> %1
-}
-
-define <8 x i32> @avx2_psrlv_d_256_0(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrlv_d_256_0(
-; CHECK-NEXT:    ret <8 x i32> %v
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
-  ret <8 x i32> %1
-}
-
-define <4 x i32> @avx2_psrlv_d_128_var(<4 x i32> %v) {
-; CHECK-LABEL: @avx2_psrlv_d_128_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31>
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>)
-  ret <4 x i32> %1
-}
-
-define <8 x i32> @avx2_psrlv_d_256_var(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrlv_d_256_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
-  ret <8 x i32> %1
-}
-
-define <4 x i32> @avx2_psrlv_d_128_big(<4 x i32> %v) {
-; CHECK-LABEL: @avx2_psrlv_d_128_big(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
-  ret <4 x i32> %1
-}
-
-define <8 x i32> @avx2_psrlv_d_256_big(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrlv_d_256_big(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
-  ret <8 x i32> %1
-}
-
-define <4 x i32> @avx2_psrlv_d_128_allbig(<4 x i32> %v) {
-; CHECK-LABEL: @avx2_psrlv_d_128_allbig(
-; CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
-;
-  %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
-  ret <4 x i32> %1
-}
-
-define <8 x i32> @avx2_psrlv_d_256_allbig(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrlv_d_256_allbig(
-; CHECK-NEXT:    ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
-  ret <8 x i32> %1
-}
-
-define <4 x i32> @avx2_psrlv_d_128_undef(<4 x i32> %v) {
-; CHECK-LABEL: @avx2_psrlv_d_128_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31>
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0
-  %2 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> %1)
-  ret <4 x i32> %2
-}
-
-define <8 x i32> @avx2_psrlv_d_256_undef(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrlv_d_256_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
-  %2 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> %1)
-  ret <8 x i32> %2
-}
-
-define <2 x i64> @avx2_psrlv_q_128_0(<2 x i64> %v) {
-; CHECK-LABEL: @avx2_psrlv_q_128_0(
-; CHECK-NEXT:    ret <2 x i64> %v
-;
-  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> zeroinitializer)
-  ret <2 x i64> %1
-}
-
-define <4 x i64> @avx2_psrlv_q_256_0(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psrlv_q_256_0(
-; CHECK-NEXT:    ret <4 x i64> %v
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
-  ret <4 x i64> %1
-}
-
-define <2 x i64> @avx2_psrlv_q_128_var(<2 x i64> %v) {
-; CHECK-LABEL: @avx2_psrlv_q_128_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 0, i64 8>
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
-  ret <2 x i64> %1
-}
-
-define <4 x i64> @avx2_psrlv_q_256_var(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psrlv_q_256_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 0, i64 8, i64 16, i64 31>
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
-  ret <4 x i64> %1
-}
-
-define <2 x i64> @avx2_psrlv_q_128_big(<2 x i64> %v) {
-; CHECK-LABEL: @avx2_psrlv_q_128_big(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
-  ret <2 x i64> %1
-}
-
-define <4 x i64> @avx2_psrlv_q_256_big(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psrlv_q_256_big(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
-  ret <4 x i64> %1
-}
-
-define <2 x i64> @avx2_psrlv_q_128_allbig(<2 x i64> %v) {
-; CHECK-LABEL: @avx2_psrlv_q_128_allbig(
-; CHECK-NEXT:    ret <2 x i64> zeroinitializer
-;
-  %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>)
-  ret <2 x i64> %1
-}
-
-define <4 x i64> @avx2_psrlv_q_256_allbig(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psrlv_q_256_allbig(
-; CHECK-NEXT:    ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0>
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
-  ret <4 x i64> %1
-}
-
-define <2 x i64> @avx2_psrlv_q_128_undef(<2 x i64> %v) {
-; CHECK-LABEL: @avx2_psrlv_q_128_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 0, i64 undef>
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 1
-  %2 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> %1)
-  ret <2 x i64> %2
-}
-
-define <4 x i64> @avx2_psrlv_q_256_undef(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psrlv_q_256_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 undef, i64 8, i64 16, i64 31>
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
-  %2 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> %1)
-  ret <4 x i64> %2
-}
-
-define <16 x i32> @avx2_psrlv_d_512_0(<16 x i32> %v) {
-; CHECK-LABEL: @avx2_psrlv_d_512_0(
-; CHECK-NEXT:    ret <16 x i32> %v
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> zeroinitializer)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_psrlv_d_512_var(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psrlv_d_512_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_psrlv_d_512_big(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psrlv_d_512_big(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_psrlv_d_512_allbig(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psrlv_d_512_allbig(
-; CHECK-NEXT:    ret <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_psrlv_d_512_undef(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psrlv_d_512_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
-  %2 = tail call <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32> %v, <16 x i32> %1)
-  ret <16 x i32> %2
-}
-
-define <8 x i64> @avx512_psrlv_q_512_0(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psrlv_q_512_0(
-; CHECK-NEXT:    ret <8 x i64> %v
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> zeroinitializer)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_psrlv_q_512_var(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psrlv_q_512_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> %v, <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_psrlv_q_512_big(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psrlv_q_512_big(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_psrlv_q_512_allbig(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psrlv_q_512_allbig(
-; CHECK-NEXT:    ret <8 x i64> <i64 0, i64 undef, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 0>
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_psrlv_q_512_undef(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psrlv_q_512_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i64> %v, <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
-  %2 = tail call <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64> %v, <8 x i64> %1)
-  ret <8 x i64> %2
-}
-
-define <8 x i16> @avx512_psrlv_w_128_0(<8 x i16> %v) {
-; CHECK-LABEL: @avx512_psrlv_w_128_0(
-; CHECK-NEXT:    ret <8 x i16> %v
-;
-  %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> zeroinitializer)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @avx512_psrlv_w_128_var(<8 x i16> %v) {
-; CHECK-LABEL: @avx512_psrlv_w_128_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @avx512_psrlv_w_128_big(<8 x i16> %v) {
-; CHECK-LABEL: @avx512_psrlv_w_128_big(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @avx512_psrlv_w_128_allbig(<8 x i16> %v) {
-; CHECK-LABEL: @avx512_psrlv_w_128_allbig(
-; CHECK-NEXT:    ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef>
-;
-  %1 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 undef>)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @avx512_psrlv_w_128_undef(<8 x i16> %v) {
-; CHECK-LABEL: @avx512_psrlv_w_128_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <8 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %1 = insertelement <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i64 0
-  %2 = tail call <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16> %v, <8 x i16> %1)
-  ret <8 x i16> %2
-}
-
-define <16 x i16> @avx512_psrlv_w_256_0(<16 x i16> %v) {
-; CHECK-LABEL: @avx512_psrlv_w_256_0(
-; CHECK-NEXT:    ret <16 x i16> %v
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> zeroinitializer)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx512_psrlv_w_256_var(<16 x i16> %v) {
-; CHECK-LABEL: @avx512_psrlv_w_256_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx512_psrlv_w_256_big(<16 x i16> %v) {
-; CHECK-LABEL: @avx512_psrlv_w_256_big(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx512_psrlv_w_256_allbig(<16 x i16> %v) {
-; CHECK-LABEL: @avx512_psrlv_w_256_allbig(
-; CHECK-NEXT:    ret <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 256, i16 16, i16 28, i16 65535, i16 32767>)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx512_psrlv_w_256_undef(<16 x i16> %v) {
-; CHECK-LABEL: @avx512_psrlv_w_256_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <16 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %1 = insertelement <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, i16 undef, i64 0
-  %2 = tail call <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16> %v, <16 x i16> %1)
-  ret <16 x i16> %2
-}
-
-define <32 x i16> @avx512_psrlv_w_512_0(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psrlv_w_512_0(
-; CHECK-NEXT:    ret <32 x i16> %v
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> zeroinitializer)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psrlv_w_512_var(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psrlv_w_512_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psrlv_w_512_big(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psrlv_w_512_big(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psrlv_w_512_allbig(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psrlv_w_512_allbig(
-; CHECK-NEXT:    ret <32 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 undef, i16 0, i16 0>
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 128, i16 16, i16 28, i16 65535, i16 32767, i16 56, i16 -14, i16 undef, i16 16, i16 67, i16 567, i16 -32768, i16 4096, i16 8192, i16 -12345, i16 undef, i16 345, i16 123, i16 undef, i16 1024, i16 54321>)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psrlv_w_512_undef(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psrlv_w_512_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = lshr <32 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = insertelement <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 undef, i64 0
-  %2 = tail call <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16> %v, <32 x i16> %1)
-  ret <32 x i16> %2
-}
-
-;
-; SHL - Constant Per-Element Vector
-;
-
-define <4 x i32> @avx2_psllv_d_128_0(<4 x i32> %v) {
-; CHECK-LABEL: @avx2_psllv_d_128_0(
-; CHECK-NEXT:    ret <4 x i32> %v
-;
-  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> zeroinitializer)
-  ret <4 x i32> %1
-}
-
-define <8 x i32> @avx2_psllv_d_256_0(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psllv_d_256_0(
-; CHECK-NEXT:    ret <8 x i32> %v
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
-  ret <8 x i32> %1
-}
-
-define <4 x i32> @avx2_psllv_d_128_var(<4 x i32> %v) {
-; CHECK-LABEL: @avx2_psllv_d_128_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31>
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>)
-  ret <4 x i32> %1
-}
-
-define <8 x i32> @avx2_psllv_d_256_var(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psllv_d_256_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
-  ret <8 x i32> %1
-}
-
-define <4 x i32> @avx2_psllv_d_128_big(<4 x i32> %v) {
-; CHECK-LABEL: @avx2_psllv_d_128_big(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
-  ret <4 x i32> %1
-}
-
-define <8 x i32> @avx2_psllv_d_256_big(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psllv_d_256_big(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
-  ret <8 x i32> %1
-}
-
-define <4 x i32> @avx2_psllv_d_128_allbig(<4 x i32> %v) {
-; CHECK-LABEL: @avx2_psllv_d_128_allbig(
-; CHECK-NEXT:    ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
-;
-  %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
-  ret <4 x i32> %1
-}
-
-define <8 x i32> @avx2_psllv_d_256_allbig(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psllv_d_256_allbig(
-; CHECK-NEXT:    ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
-  ret <8 x i32> %1
-}
-
-define <4 x i32> @avx2_psllv_d_128_undef(<4 x i32> %v) {
-; CHECK-LABEL: @avx2_psllv_d_128_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31>
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0
-  %2 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> %1)
-  ret <4 x i32> %2
-}
-
-define <8 x i32> @avx2_psllv_d_256_undef(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psllv_d_256_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
-  %2 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> %1)
-  ret <8 x i32> %2
-}
-
-define <2 x i64> @avx2_psllv_q_128_0(<2 x i64> %v) {
-; CHECK-LABEL: @avx2_psllv_q_128_0(
-; CHECK-NEXT:    ret <2 x i64> %v
-;
-  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> zeroinitializer)
-  ret <2 x i64> %1
-}
-
-define <4 x i64> @avx2_psllv_q_256_0(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psllv_q_256_0(
-; CHECK-NEXT:    ret <4 x i64> %v
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
-  ret <4 x i64> %1
-}
-
-define <2 x i64> @avx2_psllv_q_128_var(<2 x i64> %v) {
-; CHECK-LABEL: @avx2_psllv_q_128_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> %v, <i64 0, i64 8>
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
-  ret <2 x i64> %1
-}
-
-define <4 x i64> @avx2_psllv_q_256_var(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psllv_q_256_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> %v, <i64 0, i64 8, i64 16, i64 31>
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
-  ret <4 x i64> %1
-}
-
-define <2 x i64> @avx2_psllv_q_128_big(<2 x i64> %v) {
-; CHECK-LABEL: @avx2_psllv_q_128_big(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
-  ret <2 x i64> %1
-}
-
-define <4 x i64> @avx2_psllv_q_256_big(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psllv_q_256_big(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
-  ret <4 x i64> %1
-}
-
-define <2 x i64> @avx2_psllv_q_128_allbig(<2 x i64> %v) {
-; CHECK-LABEL: @avx2_psllv_q_128_allbig(
-; CHECK-NEXT:    ret <2 x i64> zeroinitializer
-;
-  %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>)
-  ret <2 x i64> %1
-}
-
-define <4 x i64> @avx2_psllv_q_256_allbig(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psllv_q_256_allbig(
-; CHECK-NEXT:    ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0>
-;
-  %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
-  ret <4 x i64> %1
-}
-
-define <2 x i64> @avx2_psllv_q_128_undef(<2 x i64> %v) {
-; CHECK-LABEL: @avx2_psllv_q_128_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <2 x i64> %v, <i64 0, i64 undef>
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 1
-  %2 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> %1)
-  ret <2 x i64> %2
-}
-
-define <4 x i64> @avx2_psllv_q_256_undef(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psllv_q_256_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <4 x i64> %v, <i64 undef, i64 8, i64 16, i64 31>
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
-  %2 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> %1)
-  ret <4 x i64> %2
-}
-
-define <16 x i32> @avx512_psllv_d_512_0(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psllv_d_512_0(
-; CHECK-NEXT:    ret <16 x i32> %v
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> zeroinitializer)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_psllv_d_512_var(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psllv_d_512_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_psllv_d_512_big(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psllv_d_512_big(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_psllv_d_512_allbig(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psllv_d_512_allbig(
-; CHECK-NEXT:    ret <16 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555, i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
-  ret <16 x i32> %1
-}
-
-define <16 x i32> @avx512_psllv_d_512_undef(<16 x i32> %v) {
-; CHECK-LABEL: @avx512_psllv_d_512_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %1 = insertelement <16 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0, i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
-  %2 = tail call <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32> %v, <16 x i32> %1)
-  ret <16 x i32> %2
-}
-
-define <8 x i64> @avx512_psllv_q_512_0(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psllv_q_512_0(
-; CHECK-NEXT:    ret <8 x i64> %v
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> zeroinitializer)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_psllv_q_512_var(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psllv_q_512_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> %v, <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_psllv_q_512_big(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psllv_q_512_big(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 0, i64 8, i64 16, i64 64, i64 0, i64 8, i64 16, i64 64>)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_psllv_q_512_allbig(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psllv_q_512_allbig(
-; CHECK-NEXT:    ret <8 x i64> <i64 0, i64 undef, i64 0, i64 0, i64 0, i64 undef, i64 0, i64 0>
-;
-  %1 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> <i64 64, i64 undef, i64 -128, i64 -60, i64 64, i64 undef, i64 -128, i64 -60>)
-  ret <8 x i64> %1
-}
-
-define <8 x i64> @avx512_psllv_q_512_undef(<8 x i64> %v) {
-; CHECK-LABEL: @avx512_psllv_q_512_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i64> %v, <i64 undef, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %1 = insertelement <8 x i64> <i64 0, i64 8, i64 16, i64 31, i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
-  %2 = tail call <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64> %v, <8 x i64> %1)
-  ret <8 x i64> %2
-}
-
-define <8 x i16> @avx512_psllv_w_128_0(<8 x i16> %v) {
-; CHECK-LABEL: @avx512_psllv_w_128_0(
-; CHECK-NEXT:    ret <8 x i16> %v
-;
-  %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> zeroinitializer)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @avx512_psllv_w_128_var(<8 x i16> %v) {
-; CHECK-LABEL: @avx512_psllv_w_128_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @avx512_psllv_w_128_big(<8 x i16> %v) {
-; CHECK-LABEL: @avx512_psllv_w_128_big(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 16>)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @avx512_psllv_w_128_allbig(<8 x i16> %v) {
-; CHECK-LABEL: @avx512_psllv_w_128_allbig(
-; CHECK-NEXT:    ret <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef>
-;
-  %1 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 undef>)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @avx512_psllv_w_128_undef(<8 x i16> %v) {
-; CHECK-LABEL: @avx512_psllv_w_128_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <8 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %1 = insertelement <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>, i16 undef, i64 0
-  %2 = tail call <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16> %v, <8 x i16> %1)
-  ret <8 x i16> %2
-}
-
-define <16 x i16> @avx512_psllv_w_256_0(<16 x i16> %v) {
-; CHECK-LABEL: @avx512_psllv_w_256_0(
-; CHECK-NEXT:    ret <16 x i16> %v
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> zeroinitializer)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx512_psllv_w_256_var(<16 x i16> %v) {
-; CHECK-LABEL: @avx512_psllv_w_256_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx512_psllv_w_256_big(<16 x i16> %v) {
-; CHECK-LABEL: @avx512_psllv_w_256_big(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 16>)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx512_psllv_w_256_allbig(<16 x i16> %v) {
-; CHECK-LABEL: @avx512_psllv_w_256_allbig(
-; CHECK-NEXT:    ret <16 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 256, i16 16, i16 28, i16 65535, i16 32767>)
-  ret <16 x i16> %1
-}
-
-define <16 x i16> @avx512_psllv_w_256_undef(<16 x i16> %v) {
-; CHECK-LABEL: @avx512_psllv_w_256_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <16 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %1 = insertelement <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>, i16 undef, i64 0
-  %2 = tail call <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16> %v, <16 x i16> %1)
-  ret <16 x i16> %2
-}
-
-define <32 x i16> @avx512_psllv_w_512_0(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psllv_w_512_0(
-; CHECK-NEXT:    ret <32 x i16> %v
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> zeroinitializer)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psllv_w_512_var(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psllv_w_512_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> %v, <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psllv_w_512_big(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psllv_w_512_big(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psllv_w_512_allbig(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psllv_w_512_allbig(
-; CHECK-NEXT:    ret <32 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 undef, i16 0, i16 0, i16 undef, i16 0, i16 0>
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> <i16 20, i16 -1, i16 -2, i16 33, i16 44, i16 55, i16 66, i16 -7, i16 undef, i16 64, i16 -10, i16 128, i16 16, i16 28, i16 65535, i16 32767, i16 56, i16 -14, i16 undef, i16 16, i16 67, i16 567, i16 -32768, i16 4096, i16 8192, i16 -12345, i16 undef, i16 345, i16 123, i16 undef, i16 1024, i16 54321>)
-  ret <32 x i16> %1
-}
-
-define <32 x i16> @avx512_psllv_w_512_undef(<32 x i16> %v) {
-; CHECK-LABEL: @avx512_psllv_w_512_undef(
-; CHECK-NEXT:    [[TMP1:%.*]] = shl <32 x i16> %v, <i16 undef, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = insertelement <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>, i16 undef, i64 0
-  %2 = tail call <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16> %v, <32 x i16> %1)
-  ret <32 x i16> %2
-}
-
-;
-; Vector Demanded Bits
-;
-
-define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) {
-; CHECK-LABEL: @sse2_psra_w_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %a)
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
-  ret <8 x i16> %2
-}
-
-define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) {
-; CHECK-LABEL: @sse2_psra_w_var_bc(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %a to <8 x i16>
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> [[TMP1]])
-; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
-;
-  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
-  %2 = bitcast <2 x i64> %1 to <8 x i16>
-  %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %2)
-  ret <8 x i16> %3
-}
-
-define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) {
-; CHECK-LABEL: @sse2_psra_d_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %a)
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-  %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
-  ret <4 x i32> %2
-}
-
-define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) {
-; CHECK-LABEL: @sse2_psra_d_var_bc(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <8 x i16> %a to <4 x i32>
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> [[TMP1]])
-; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
-;
-  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-  %2 = bitcast <8 x i16> %1 to <4 x i32>
-  %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %2)
-  ret <4 x i32> %3
-}
-
-define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) {
-; CHECK-LABEL: @avx2_psra_w_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %a)
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1)
-  ret <16 x i16> %2
-}
-
-define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) {
-; CHECK-LABEL: @avx2_psra_d_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %a)
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1)
-  ret <8 x i32> %2
-}
-
-define <2 x i64> @avx512_psra_q_128_var(<2 x i64> %v, <2 x i64> %a) {
-; CHECK-LABEL: @avx512_psra_q_128_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> %a)
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
-  %2 = tail call <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64> %v, <2 x i64> %1)
-  ret <2 x i64> %2
-}
-
-define <4 x i64> @avx512_psra_q_256_var(<4 x i64> %v, <2 x i64> %a) {
-; CHECK-LABEL: @avx512_psra_q_256_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> %a)
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
-  %2 = tail call <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64> %v, <2 x i64> %1)
-  ret <4 x i64> %2
-}
-
-define <32 x i16> @avx512_psra_w_512_var(<32 x i16> %v, <8 x i16> %a) {
-; CHECK-LABEL: @avx512_psra_w_512_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> %a)
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-  %2 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %v, <8 x i16> %1)
-  ret <32 x i16> %2
-}
-
-define <16 x i32> @avx512_psra_d_512_var(<16 x i32> %v, <4 x i32> %a) {
-; CHECK-LABEL: @avx512_psra_d_512_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> %a)
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-  %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %v, <4 x i32> %1)
-  ret <16 x i32> %2
-}
-
-define <8 x i64> @avx512_psra_q_512_var(<8 x i64> %v, <2 x i64> %a) {
-; CHECK-LABEL: @avx512_psra_q_512_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> %a)
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
-  %2 = tail call <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64> %v, <2 x i64> %1)
-  ret <8 x i64> %2
-}
-
-define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) {
-; CHECK-LABEL: @sse2_psrl_w_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %a)
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-  %2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1)
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) {
-; CHECK-LABEL: @sse2_psrl_d_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %a)
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-  %2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1)
-  ret <4 x i32> %2
-}
-
-define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) {
-; CHECK-LABEL: @sse2_psrl_q_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %a)
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
-  %2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1)
-  ret <2 x i64> %2
-}
-
-define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) {
-; CHECK-LABEL: @avx2_psrl_w_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %a)
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-  %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
-  ret <16 x i16> %2
-}
-
-define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) {
-; CHECK-LABEL: @avx2_psrl_w_var_bc(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> %a to <8 x i16>
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> [[TMP1]])
-; CHECK-NEXT:    ret <16 x i16> [[TMP2]]
-;
-  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %2 = bitcast <16 x i8> %1 to <8 x i16>
-  %3 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %2)
-  ret <16 x i16> %3
-}
-
-define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) {
-; CHECK-LABEL: @avx2_psrl_d_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %a)
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-  %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
-  ret <8 x i32> %2
-}
-
-define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) {
-; CHECK-LABEL: @avx2_psrl_d_var_bc(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %a to <4 x i32>
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> [[TMP1]])
-; CHECK-NEXT:    ret <8 x i32> [[TMP2]]
-;
-  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
-  %2 = bitcast <2 x i64> %1 to <4 x i32>
-  %3 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %2)
-  ret <8 x i32> %3
-}
-
-define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) {
-; CHECK-LABEL: @avx2_psrl_q_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %a)
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
-  %2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1)
-  ret <4 x i64> %2
-}
-
-define <32 x i16> @avx512_psrl_w_512_var(<32 x i16> %v, <8 x i16> %a) {
-; CHECK-LABEL: @avx512_psrl_w_512_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %a)
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-  %2 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %1)
-  ret <32 x i16> %2
-}
-
-define <32 x i16> @avx512_psrl_w_512_var_bc(<32 x i16> %v, <16 x i8> %a) {
-; CHECK-LABEL: @avx512_psrl_w_512_var_bc(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <16 x i8> %a to <8 x i16>
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> [[TMP1]])
-; CHECK-NEXT:    ret <32 x i16> [[TMP2]]
-;
-  %1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-  %2 = bitcast <16 x i8> %1 to <8 x i16>
-  %3 = tail call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %v, <8 x i16> %2)
-  ret <32 x i16> %3
-}
-
-define <16 x i32> @avx512_psrl_d_512_var(<16 x i32> %v, <4 x i32> %a) {
-; CHECK-LABEL: @avx512_psrl_d_512_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %a)
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-  %2 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %1)
-  ret <16 x i32> %2
-}
-
-define <16 x i32> @avx512_psrl_d_512_var_bc(<16 x i32> %v, <2 x i64> %a) {
-; CHECK-LABEL: @avx512_psrl_d_512_var_bc(
-; CHECK-NEXT:    [[TMP1:%.*]] = bitcast <2 x i64> %a to <4 x i32>
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> [[TMP1]])
-; CHECK-NEXT:    ret <16 x i32> [[TMP2]]
-;
-  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
-  %2 = bitcast <2 x i64> %1 to <4 x i32>
-  %3 = tail call <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32> %v, <4 x i32> %2)
-  ret <16 x i32> %3
-}
-
-define <8 x i64> @avx512_psrl_q_512_var(<8 x i64> %v, <2 x i64> %a) {
-; CHECK-LABEL: @avx512_psrl_q_512_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> %a)
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
-  %2 = tail call <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64> %v, <2 x i64> %1)
-  ret <8 x i64> %2
-}
-
-define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) {
-; CHECK-LABEL: @sse2_psll_w_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %a)
-; CHECK-NEXT:    ret <8 x i16> [[TMP1]]
-;
-  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-  %2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1)
-  ret <8 x i16> %2
-}
-
-define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) {
-; CHECK-LABEL: @sse2_psll_d_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %a)
-; CHECK-NEXT:    ret <4 x i32> [[TMP1]]
-;
-  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-  %2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1)
-  ret <4 x i32> %2
-}
-
-define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) {
-; CHECK-LABEL: @sse2_psll_q_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %a)
-; CHECK-NEXT:    ret <2 x i64> [[TMP1]]
-;
-  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
-  %2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1)
-  ret <2 x i64> %2
-}
-
-define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) {
-; CHECK-LABEL: @avx2_psll_w_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %a)
-; CHECK-NEXT:    ret <16 x i16> [[TMP1]]
-;
-  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-  %2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1)
-  ret <16 x i16> %2
-}
-
-define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) {
-; CHECK-LABEL: @avx2_psll_d_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %a)
-; CHECK-NEXT:    ret <8 x i32> [[TMP1]]
-;
-  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-  %2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1)
-  ret <8 x i32> %2
-}
-
-define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) {
-; CHECK-LABEL: @avx2_psll_q_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %a)
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
-  %2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1)
-  ret <4 x i64> %2
-}
-
-define <32 x i16> @avx512_psll_w_512_var(<32 x i16> %v, <8 x i16> %a) {
-; CHECK-LABEL: @avx512_psll_w_512_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> %a)
-; CHECK-NEXT:    ret <32 x i16> [[TMP1]]
-;
-  %1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-  %2 = tail call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %v, <8 x i16> %1)
-  ret <32 x i16> %2
-}
-
-define <16 x i32> @avx512_psll_d_512_var(<16 x i32> %v, <4 x i32> %a) {
-; CHECK-LABEL: @avx512_psll_d_512_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> %a)
-; CHECK-NEXT:    ret <16 x i32> [[TMP1]]
-;
-  %1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-  %2 = tail call <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32> %v, <4 x i32> %1)
-  ret <16 x i32> %2
-}
-
-define <8 x i64> @avx512_psll_q_512_var(<8 x i64> %v, <2 x i64> %a) {
-; CHECK-LABEL: @avx512_psll_q_512_var(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> %a)
-; CHECK-NEXT:    ret <8 x i64> [[TMP1]]
-;
-  %1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
-  %2 = tail call <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64> %v, <2 x i64> %1)
-  ret <8 x i64> %2
-}
-
-;
-; Constant Folding
-;
-
-define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) {
-; CHECK-LABEL: @test_sse2_psra_w_0(
-; CHECK-NEXT:    ret <8 x i16> %A
-;
-  %1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0)
-  %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
-  %3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0)
-  ret <8 x i16> %3
-}
-
-define <8 x i16> @test_sse2_psra_w_8() {
-; CHECK-LABEL: @test_sse2_psra_w_8(
-; CHECK-NEXT:    ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
-;
-  %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <8 x i16>
-  %2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3)
-  %3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
-  %4 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %3, i32 2)
-  ret <8 x i16> %4
-}
-
-define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) {
-; CHECK-LABEL: @test_sse2_psra_d_0(
-; CHECK-NEXT:    ret <4 x i32> %A
-;
-  %1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0)
-  %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
-  %3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0)
-  ret <4 x i32> %3
-}
-
-define <4 x i32> @sse2_psra_d_8() {
-; CHECK-LABEL: @sse2_psra_d_8(
-; CHECK-NEXT:    ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608>
-;
-  %1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <4 x i32>
-  %2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3)
-  %3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
-  %4 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %3, i32 2)
-  ret <4 x i32> %4
-}
-
-define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) {
-; CHECK-LABEL: @test_avx2_psra_w_0(
-; CHECK-NEXT:    ret <16 x i16> %A
-;
-  %1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
-  %2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
-  %3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
-  ret <16 x i16> %3
-}
-
-define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) {
-; CHECK-LABEL: @test_avx2_psra_w_8(
-; CHECK-NEXT:    ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
-;
-  %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i16>
-  %2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3)
-  %3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
-  %4 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %3, i32 2)
-  ret <16 x i16> %4
-}
-
-define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) {
-; CHECK-LABEL: @test_avx2_psra_d_0(
-; CHECK-NEXT:    ret <8 x i32> %A
-;
-  %1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
-  %2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
-  %3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
-  ret <8 x i32> %3
-}
-
-define <8 x i32> @test_avx2_psra_d_8() {
-; CHECK-LABEL: @test_avx2_psra_d_8(
-; CHECK-NEXT:    ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
-;
-  %1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <8 x i32>
-  %2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3)
-  %3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
-  %4 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %3, i32 2)
-  ret <8 x i32> %4
-}
-
-define <32 x i16> @test_avx512_psra_w_512_0(<32 x i16> %A) {
-; CHECK-LABEL: @test_avx512_psra_w_512_0(
-; CHECK-NEXT:    ret <32 x i16> %A
-;
-  %1 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %A, i32 0)
-  %2 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
-  %3 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %2, i32 0)
-  ret <32 x i16> %3
-}
-
-define <32 x i16> @test_avx512_psra_w_512_8(<32 x i16> %A) {
-; CHECK-LABEL: @test_avx512_psra_w_512_8(
-; CHECK-NEXT:    ret <32 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
-;
-  %1 = bitcast <8 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <32 x i16>
-  %2 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %1, i32 3)
-  %3 = tail call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
-  %4 = tail call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %3, i32 2)
-  ret <32 x i16> %4
-}
-
-define <16 x i32> @test_avx512_psra_d_512_0(<16 x i32> %A) {
-; CHECK-LABEL: @test_avx512_psra_d_512_0(
-; CHECK-NEXT:    ret <16 x i32> %A
-;
-  %1 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %A, i32 0)
-  %2 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
-  %3 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %2, i32 0)
-  ret <16 x i32> %3
-}
-
-define <16 x i32> @test_avx512_psra_d_512_8() {
-; CHECK-LABEL: @test_avx512_psra_d_512_8(
-; CHECK-NEXT:    ret <16 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
-;
-  %1 = bitcast <8 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i32>
-  %2 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %1, i32 3)
-  %3 = tail call <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
-  %4 = tail call <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32> %3, i32 2)
-  ret <16 x i32> %4
-}
-
-;
-; Old Tests
-;
-
-define <2 x i64> @test_sse2_1() {
-; CHECK-LABEL: @test_sse2_1(
-; CHECK-NEXT:    ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
-;
-  %S = bitcast i32 1 to i32
-  %1 = zext i32 %S to i64
-  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
-  %3 = insertelement <2 x i64> %2, i64 0, i32 1
-  %4 = bitcast <2 x i64> %3 to <8 x i16>
-  %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
-  %6 = bitcast <8 x i16> %5 to <4 x i32>
-  %7 = bitcast <2 x i64> %3 to <4 x i32>
-  %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
-  %9 = bitcast <4 x i32> %8 to <2 x i64>
-  %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
-  %11 = bitcast <2 x i64> %10 to <8 x i16>
-  %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
-  %13 = bitcast <8 x i16> %12 to <4 x i32>
-  %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
-  %15 = bitcast <4 x i32> %14 to <2 x i64>
-  %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
-  ret <2 x i64> %16
-}
-
-define <4 x i64> @test_avx2_1() {
-; CHECK-LABEL: @test_avx2_1(
-; CHECK-NEXT:    ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
-;
-  %S = bitcast i32 1 to i32
-  %1 = zext i32 %S to i64
-  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
-  %3 = insertelement <2 x i64> %2, i64 0, i32 1
-  %4 = bitcast <2 x i64> %3 to <8 x i16>
-  %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
-  %6 = bitcast <16 x i16> %5 to <8 x i32>
-  %7 = bitcast <2 x i64> %3 to <4 x i32>
-  %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
-  %9 = bitcast <8 x i32> %8 to <4 x i64>
-  %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
-  %11 = bitcast <4 x i64> %10 to <16 x i16>
-  %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
-  %13 = bitcast <16 x i16> %12 to <8 x i32>
-  %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
-  %15 = bitcast <8 x i32> %14 to <4 x i64>
-  %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
-  ret <4 x i64> %16
-}
-
-define <2 x i64> @test_sse2_0() {
-; CHECK-LABEL: @test_sse2_0(
-; CHECK-NEXT:    ret <2 x i64> zeroinitializer
-;
-  %S = bitcast i32 128 to i32
-  %1 = zext i32 %S to i64
-  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
-  %3 = insertelement <2 x i64> %2, i64 0, i32 1
-  %4 = bitcast <2 x i64> %3 to <8 x i16>
-  %5 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>, <8 x i16> %4)
-  %6 = bitcast <8 x i16> %5 to <4 x i32>
-  %7 = bitcast <2 x i64> %3 to <4 x i32>
-  %8 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %6, <4 x i32> %7)
-  %9 = bitcast <4 x i32> %8 to <2 x i64>
-  %10 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %9, <2 x i64> %3)
-  %11 = bitcast <2 x i64> %10 to <8 x i16>
-  %12 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %11, i32 %S)
-  %13 = bitcast <8 x i16> %12 to <4 x i32>
-  %14 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %13, i32 %S)
-  %15 = bitcast <4 x i32> %14 to <2 x i64>
-  %16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
-  ret <2 x i64> %16
-}
-
-define <4 x i64> @test_avx2_0() {
-; CHECK-LABEL: @test_avx2_0(
-; CHECK-NEXT:    ret <4 x i64> zeroinitializer
-;
-  %S = bitcast i32 128 to i32
-  %1 = zext i32 %S to i64
-  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
-  %3 = insertelement <2 x i64> %2, i64 0, i32 1
-  %4 = bitcast <2 x i64> %3 to <8 x i16>
-  %5 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> <i16 1, i16 0, i16 0, i16 0, i16 2, i16 0, i16 0, i16 0, i16 3, i16 0, i16 0, i16 0, i16 4, i16 0, i16 0, i16 0>, <8 x i16> %4)
-  %6 = bitcast <16 x i16> %5 to <8 x i32>
-  %7 = bitcast <2 x i64> %3 to <4 x i32>
-  %8 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %6, <4 x i32> %7)
-  %9 = bitcast <8 x i32> %8 to <4 x i64>
-  %10 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %9, <2 x i64> %3)
-  %11 = bitcast <4 x i64> %10 to <16 x i16>
-  %12 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %11, i32 %S)
-  %13 = bitcast <16 x i16> %12 to <8 x i32>
-  %14 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %13, i32 %S)
-  %15 = bitcast <8 x i32> %14 to <4 x i64>
-  %16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
-  ret <4 x i64> %16
-}
-define <2 x i64> @test_sse2_psrl_1() {
-; CHECK-LABEL: @test_sse2_psrl_1(
-; CHECK-NEXT:    ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
-;
-  %S = bitcast i32 1 to i32
-  %1 = zext i32 %S to i64
-  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
-  %3 = insertelement <2 x i64> %2, i64 0, i32 1
-  %4 = bitcast <2 x i64> %3 to <8 x i16>
-  %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 16, i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048>, <8 x i16> %4)
-  %6 = bitcast <8 x i16> %5 to <4 x i32>
-  %7 = bitcast <2 x i64> %3 to <4 x i32>
-  %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
-  %9 = bitcast <4 x i32> %8 to <2 x i64>
-  %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
-  %11 = bitcast <2 x i64> %10 to <8 x i16>
-  %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
-  %13 = bitcast <8 x i16> %12 to <4 x i32>
-  %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
-  %15 = bitcast <4 x i32> %14 to <2 x i64>
-  %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
-  ret <2 x i64> %16
-}
-
-define <4 x i64> @test_avx2_psrl_1() {
-; CHECK-LABEL: @test_avx2_psrl_1(
-; CHECK-NEXT:    ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
-;
-  %S = bitcast i32 1 to i32
-  %1 = zext i32 %S to i64
-  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
-  %3 = insertelement <2 x i64> %2, i64 0, i32 1
-  %4 = bitcast <2 x i64> %3 to <8 x i16>
-  %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
-  %6 = bitcast <16 x i16> %5 to <8 x i32>
-  %7 = bitcast <2 x i64> %3 to <4 x i32>
-  %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
-  %9 = bitcast <8 x i32> %8 to <4 x i64>
-  %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
-  %11 = bitcast <4 x i64> %10 to <16 x i16>
-  %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
-  %13 = bitcast <16 x i16> %12 to <8 x i32>
-  %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
-  %15 = bitcast <8 x i32> %14 to <4 x i64>
-  %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
-  ret <4 x i64> %16
-}
-
-define <2 x i64> @test_sse2_psrl_0() {
-; CHECK-LABEL: @test_sse2_psrl_0(
-; CHECK-NEXT:    ret <2 x i64> zeroinitializer
-;
-  %S = bitcast i32 128 to i32
-  %1 = zext i32 %S to i64
-  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
-  %3 = insertelement <2 x i64> %2, i64 0, i32 1
-  %4 = bitcast <2 x i64> %3 to <8 x i16>
-  %5 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> <i16 32, i16 64, i16 128, i16 256, i16 512, i16 1024, i16 2048, i16 4096>, <8 x i16> %4)
-  %6 = bitcast <8 x i16> %5 to <4 x i32>
-  %7 = bitcast <2 x i64> %3 to <4 x i32>
-  %8 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %6, <4 x i32> %7)
-  %9 = bitcast <4 x i32> %8 to <2 x i64>
-  %10 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %9, <2 x i64> %3)
-  %11 = bitcast <2 x i64> %10 to <8 x i16>
-  %12 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %11, i32 %S)
-  %13 = bitcast <8 x i16> %12 to <4 x i32>
-  %14 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %13, i32 %S)
-  %15 = bitcast <4 x i32> %14 to <2 x i64>
-  %16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
-  ret <2 x i64> %16
-}
-
-define <4 x i64> @test_avx2_psrl_0() {
-; CHECK-LABEL: @test_avx2_psrl_0(
-; CHECK-NEXT:    ret <4 x i64> zeroinitializer
-;
-  %S = bitcast i32 128 to i32
-  %1 = zext i32 %S to i64
-  %2 = insertelement <2 x i64> undef, i64 %1, i32 0
-  %3 = insertelement <2 x i64> %2, i64 0, i32 1
-  %4 = bitcast <2 x i64> %3 to <8 x i16>
-  %5 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> <i16 1024, i16 0, i16 0, i16 0, i16 2048, i16 0, i16 0, i16 0, i16 4096, i16 0, i16 0, i16 0, i16 8192, i16 0, i16 0, i16 0>, <8 x i16> %4)
-  %6 = bitcast <16 x i16> %5 to <8 x i32>
-  %7 = bitcast <2 x i64> %3 to <4 x i32>
-  %8 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %6, <4 x i32> %7)
-  %9 = bitcast <8 x i32> %8 to <4 x i64>
-  %10 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %9, <2 x i64> %3)
-  %11 = bitcast <4 x i64> %10 to <16 x i16>
-  %12 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %11, i32 %S)
-  %13 = bitcast <16 x i16> %12 to <8 x i32>
-  %14 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %13, i32 %S)
-  %15 = bitcast <8 x i32> %14 to <4 x i64>
-  %16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
-  ret <4 x i64> %16
-}
-
-declare <8 x i64> @llvm.x86.avx512.pslli.q.512(<8 x i64>, i32) #1
-declare <16 x i32> @llvm.x86.avx512.pslli.d.512(<16 x i32>, i32) #1
-declare <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16>, i32) #1
-declare <8 x i64> @llvm.x86.avx512.psll.q.512(<8 x i64>, <2 x i64>) #1
-declare <16 x i32> @llvm.x86.avx512.psll.d.512(<16 x i32>, <4 x i32>) #1
-declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) #1
-declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
-declare <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32>, i32) #1
-declare <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16>, i32) #1
-declare <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64>, <2 x i64>) #1
-declare <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32>, <4 x i32>) #1
-declare <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16>, <8 x i16>) #1
-declare <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64>, i32) #1
-declare <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32>, i32) #1
-declare <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16>, i32) #1
-declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) #1
-declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) #1
-declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) #1
-
-declare <8 x i64> @llvm.x86.avx512.psrli.q.512(<8 x i64>, i32) #1
-declare <16 x i32> @llvm.x86.avx512.psrli.d.512(<16 x i32>, i32) #1
-declare <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16>, i32) #1
-declare <8 x i64> @llvm.x86.avx512.psrl.q.512(<8 x i64>, <2 x i64>) #1
-declare <16 x i32> @llvm.x86.avx512.psrl.d.512(<16 x i32>, <4 x i32>) #1
-declare <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16>, <8 x i16>) #1
-declare <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64>, i32) #1
-declare <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32>, i32) #1
-declare <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16>, i32) #1
-declare <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64>, <2 x i64>) #1
-declare <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32>, <4 x i32>) #1
-declare <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16>, <8 x i16>) #1
-declare <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64>, i32) #1
-declare <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32>, i32) #1
-declare <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16>, i32) #1
-declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) #1
-declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) #1
-declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) #1
-
-declare <8 x i64> @llvm.x86.avx512.psrai.q.512(<8 x i64>, i32) #1
-declare <16 x i32> @llvm.x86.avx512.psrai.d.512(<16 x i32>, i32) #1
-declare <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16>, i32) #1
-declare <8 x i64> @llvm.x86.avx512.psra.q.512(<8 x i64>, <2 x i64>) #1
-declare <16 x i32> @llvm.x86.avx512.psra.d.512(<16 x i32>, <4 x i32>) #1
-declare <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16>, <8 x i16>) #1
-declare <4 x i64> @llvm.x86.avx512.psrai.q.256(<4 x i64>, i32) #1
-declare <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32>, i32) #1
-declare <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16>, i32) #1
-declare <4 x i64> @llvm.x86.avx512.psra.q.256(<4 x i64>, <2 x i64>) #1
-declare <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32>, <4 x i32>) #1
-declare <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16>, <8 x i16>) #1
-declare <2 x i64> @llvm.x86.avx512.psrai.q.128(<2 x i64>, i32) #1
-declare <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32>, i32) #1
-declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1
-declare <2 x i64> @llvm.x86.avx512.psra.q.128(<2 x i64>, <2 x i64>) #1
-declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1
-declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1
-
-declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) #1
-declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) #1
-declare <16 x i32> @llvm.x86.avx512.psrav.d.512(<16 x i32>, <16 x i32>) #1
-declare <2 x i64> @llvm.x86.avx512.psrav.q.128(<2 x i64>, <2 x i64>) #1
-declare <4 x i64> @llvm.x86.avx512.psrav.q.256(<4 x i64>, <4 x i64>) #1
-declare <8 x i64> @llvm.x86.avx512.psrav.q.512(<8 x i64>, <8 x i64>) #1
-
-declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) #1
-declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) #1
-declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) #1
-declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) #1
-declare <16 x i32> @llvm.x86.avx512.psrlv.d.512(<16 x i32>, <16 x i32>) #1
-declare <8 x i64> @llvm.x86.avx512.psrlv.q.512(<8 x i64>, <8 x i64>) #1
-
-declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) #1
-declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) #1
-declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) #1
-declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) #1
-declare <16 x i32> @llvm.x86.avx512.psllv.d.512(<16 x i32>, <16 x i32>) #1
-declare <8 x i64> @llvm.x86.avx512.psllv.q.512(<8 x i64>, <8 x i64>) #1
-
-declare <8 x i16> @llvm.x86.avx512.psrav.w.128(<8 x i16>, <8 x i16>) #1
-declare <16 x i16> @llvm.x86.avx512.psrav.w.256(<16 x i16>, <16 x i16>) #1
-declare <32 x i16> @llvm.x86.avx512.psrav.w.512(<32 x i16>, <32 x i16>) #1
-declare <8 x i16> @llvm.x86.avx512.psrlv.w.128(<8 x i16>, <8 x i16>) #1
-declare <16 x i16> @llvm.x86.avx512.psrlv.w.256(<16 x i16>, <16 x i16>) #1
-declare <32 x i16> @llvm.x86.avx512.psrlv.w.512(<32 x i16>, <32 x i16>) #1
-declare <8 x i16> @llvm.x86.avx512.psllv.w.128(<8 x i16>, <8 x i16>) #1
-declare <16 x i16> @llvm.x86.avx512.psllv.w.256(<16 x i16>, <16 x i16>) #1
-declare <32 x i16> @llvm.x86.avx512.psllv.w.512(<32 x i16>, <32 x i16>) #1
-
-attributes #1 = { nounwind readnone }

Removed: llvm/trunk/test/Transforms/InstCombine/x86-vperm2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-vperm2.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-vperm2.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-vperm2.ll (removed)
@@ -1,313 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-; This should never happen, but make sure we don't crash handling a non-constant immediate byte.
-
-define <4 x double> @perm2pd_non_const_imm(<4 x double> %a0, <4 x double> %a1, i8 %b) {
-; CHECK-LABEL: @perm2pd_non_const_imm(
-; CHECK-NEXT:    [[RES:%.*]] = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b)
-; CHECK-NEXT:    ret <4 x double> [[RES]]
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b)
-  ret <4 x double> %res
-
-}
-
-
-; In the following 4 tests, both zero mask bits of the immediate are set.
-
-define <4 x double> @perm2pd_0x88(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0x88(
-; CHECK-NEXT:    ret <4 x double> zeroinitializer
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 136)
-  ret <4 x double> %res
-
-}
-
-define <8 x float> @perm2ps_0x88(<8 x float> %a0, <8 x float> %a1) {
-; CHECK-LABEL: @perm2ps_0x88(
-; CHECK-NEXT:    ret <8 x float> zeroinitializer
-;
-  %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 136)
-  ret <8 x float> %res
-
-}
-
-define <8 x i32> @perm2si_0x88(<8 x i32> %a0, <8 x i32> %a1) {
-; CHECK-LABEL: @perm2si_0x88(
-; CHECK-NEXT:    ret <8 x i32> zeroinitializer
-;
-  %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 136)
-  ret <8 x i32> %res
-
-}
-
-define <4 x i64> @perm2i_0x88(<4 x i64> %a0, <4 x i64> %a1) {
-; CHECK-LABEL: @perm2i_0x88(
-; CHECK-NEXT:    ret <4 x i64> zeroinitializer
-;
-  %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 136)
-  ret <4 x i64> %res
-
-}
-
-
-; The other control bits are ignored when zero mask bits of the immediate are set.
-
-define <4 x double> @perm2pd_0xff(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0xff(
-; CHECK-NEXT:    ret <4 x double> zeroinitializer
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 255)
-  ret <4 x double> %res
-
-}
-
-
-; The following 16 tests are simple shuffles, except for 2 cases where we can just return one of the
-; source vectors. Verify that we generate the right shuffle masks and undef source operand where possible..
-
-define <4 x double> @perm2pd_0x00(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0x00(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 0)
-  ret <4 x double> %res
-
-}
-
-define <4 x double> @perm2pd_0x01(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0x01(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 1)
-  ret <4 x double> %res
-
-}
-
-define <4 x double> @perm2pd_0x02(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0x02(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 2)
-  ret <4 x double> %res
-
-}
-
-define <4 x double> @perm2pd_0x03(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0x03(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3)
-  ret <4 x double> %res
-
-}
-
-define <4 x double> @perm2pd_0x10(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0x10(
-; CHECK-NEXT:    ret <4 x double> %a0
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 16)
-  ret <4 x double> %res
-
-}
-
-define <4 x double> @perm2pd_0x11(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0x11(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 17)
-  ret <4 x double> %res
-
-}
-
-define <4 x double> @perm2pd_0x12(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0x12(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 18)
-  ret <4 x double> %res
-
-}
-
-define <4 x double> @perm2pd_0x13(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0x13(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 19)
-  ret <4 x double> %res
-
-}
-
-define <4 x double> @perm2pd_0x20(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0x20(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 32)
-  ret <4 x double> %res
-
-}
-
-define <4 x double> @perm2pd_0x21(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0x21(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 33)
-  ret <4 x double> %res
-
-}
-
-define <4 x double> @perm2pd_0x22(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0x22(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 34)
-  ret <4 x double> %res
-
-}
-
-define <4 x double> @perm2pd_0x23(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0x23(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 35)
-  ret <4 x double> %res
-
-}
-
-define <4 x double> @perm2pd_0x30(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0x30(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 48)
-  ret <4 x double> %res
-
-}
-
-define <4 x double> @perm2pd_0x31(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0x31(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 49)
-  ret <4 x double> %res
-
-}
-
-define <4 x double> @perm2pd_0x32(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0x32(
-; CHECK-NEXT:    ret <4 x double> %a1
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 50)
-  ret <4 x double> %res
-
-}
-
-define <4 x double> @perm2pd_0x33(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0x33(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 51)
-  ret <4 x double> %res
-
-}
-
-; Confirm that a mask for 32-bit elements is also correct.
-
-define <8 x float> @perm2ps_0x31(<8 x float> %a0, <8 x float> %a1) {
-; CHECK-LABEL: @perm2ps_0x31(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
-; CHECK-NEXT:    ret <8 x float> [[TMP1]]
-;
-  %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 49)
-  ret <8 x float> %res
-
-}
-
-
-; Confirm that the AVX2 version works the same.
-
-define <4 x i64> @perm2i_0x33(<4 x i64> %a0, <4 x i64> %a1) {
-; CHECK-LABEL: @perm2i_0x33(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3>
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 51)
-  ret <4 x i64> %res
-
-}
-
-
-; Confirm that when a single zero mask bit is set, we replace a source vector with zeros.
-
-define <4 x double> @perm2pd_0x81(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0x81(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> <double 0.000000e+00, double 0.000000e+00, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 129)
-  ret <4 x double> %res
-
-}
-
-define <4 x double> @perm2pd_0x83(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0x83(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a1, <4 x double> <double 0.000000e+00, double 0.000000e+00, double undef, double undef>, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 131)
-  ret <4 x double> %res
-
-}
-
-define <4 x double> @perm2pd_0x28(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0x28(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> <double 0.000000e+00, double 0.000000e+00, double undef, double undef>, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 40)
-  ret <4 x double> %res
-
-}
-
-define <4 x double> @perm2pd_0x08(<4 x double> %a0, <4 x double> %a1) {
-; CHECK-LABEL: @perm2pd_0x08(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> <double 0.000000e+00, double 0.000000e+00, double undef, double undef>, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 8)
-  ret <4 x double> %res
-
-}
-
-; Check one more with the AVX2 version.
-
-define <4 x i64> @perm2i_0x28(<4 x i64> %a0, <4 x i64> %a1) {
-; CHECK-LABEL: @perm2i_0x28(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> <i64 0, i64 0, i64 undef, i64 undef>, <4 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
-; CHECK-NEXT:    ret <4 x i64> [[TMP1]]
-;
-  %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 40)
-  ret <4 x i64> %res
-
-}
-
-declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
-declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
-declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone
-declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readnone
-

Removed: llvm/trunk/test/Transforms/InstCombine/x86-vpermil.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-vpermil.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-vpermil.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-vpermil.ll (removed)
@@ -1,298 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-
-; Verify that instcombine is able to fold identity shuffles.
-
-define <4 x float> @identity_test_vpermilvar_ps(<4 x float> %v) {
-; CHECK-LABEL: @identity_test_vpermilvar_ps(
-; CHECK-NEXT:    ret <4 x float> %v
-;
-  %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 3>)
-  ret <4 x float> %a
-}
-
-define <8 x float> @identity_test_vpermilvar_ps_256(<8 x float> %v) {
-; CHECK-LABEL: @identity_test_vpermilvar_ps_256(
-; CHECK-NEXT:    ret <8 x float> %v
-;
-  %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
-  ret <8 x float> %a
-}
-
-define <16 x float> @identity_test_vpermilvar_ps_512(<16 x float> %v) {
-; CHECK-LABEL: @identity_test_vpermilvar_ps_512(
-; CHECK-NEXT:    ret <16 x float> %v
-;
-  %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>)
-  ret <16 x float> %a
-}
-
-define <2 x double> @identity_test_vpermilvar_pd(<2 x double> %v) {
-; CHECK-LABEL: @identity_test_vpermilvar_pd(
-; CHECK-NEXT:    ret <2 x double> %v
-;
-  %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 0, i64 2>)
-  ret <2 x double> %a
-}
-
-define <4 x double> @identity_test_vpermilvar_pd_256(<4 x double> %v) {
-; CHECK-LABEL: @identity_test_vpermilvar_pd_256(
-; CHECK-NEXT:    ret <4 x double> %v
-;
-  %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 0, i64 2, i64 0, i64 2>)
-  ret <4 x double> %a
-}
-
-define <8 x double> @identity_test_vpermilvar_pd_512(<8 x double> %v) {
-; CHECK-LABEL: @identity_test_vpermilvar_pd_512(
-; CHECK-NEXT:    ret <8 x double> %v
-;
-  %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> <i64 0, i64 2, i64 0, i64 2, i64 0, i64 2, i64 0, i64 2>)
-  ret <8 x double> %a
-}
-
-; Instcombine should be able to fold the following byte shuffle to a builtin shufflevector
-; with a shuffle mask of all zeroes.
-
-define <4 x float> @zero_test_vpermilvar_ps_zero(<4 x float> %v) {
-; CHECK-LABEL: @zero_test_vpermilvar_ps_zero(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> zeroinitializer)
-  ret <4 x float> %a
-}
-
-define <8 x float> @zero_test_vpermilvar_ps_256_zero(<8 x float> %v) {
-; CHECK-LABEL: @zero_test_vpermilvar_ps_256_zero(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
-; CHECK-NEXT:    ret <8 x float> [[TMP1]]
-;
-  %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> zeroinitializer)
-  ret <8 x float> %a
-}
-
-define <16 x float> @zero_test_vpermilvar_ps_512_zero(<16 x float> %v) {
-; CHECK-LABEL: @zero_test_vpermilvar_ps_512_zero(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
-; CHECK-NEXT:    ret <16 x float> [[TMP1]]
-;
-  %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> zeroinitializer)
-  ret <16 x float> %a
-}
-
-define <2 x double> @zero_test_vpermilvar_pd_zero(<2 x double> %v) {
-; CHECK-LABEL: @zero_test_vpermilvar_pd_zero(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> zeroinitializer)
-  ret <2 x double> %a
-}
-
-define <4 x double> @zero_test_vpermilvar_pd_256_zero(<4 x double> %v) {
-; CHECK-LABEL: @zero_test_vpermilvar_pd_256_zero(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> zeroinitializer)
-  ret <4 x double> %a
-}
-
-define <8 x double> @zero_test_vpermilvar_pd_512_zero(<8 x double> %v) {
-; CHECK-LABEL: @zero_test_vpermilvar_pd_512_zero(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
-; CHECK-NEXT:    ret <8 x double> [[TMP1]]
-;
-  %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> zeroinitializer)
-  ret <8 x double> %a
-}
-
-; Verify that instcombine is able to fold constant shuffles.
-
-define <4 x float> @test_vpermilvar_ps(<4 x float> %v) {
-; CHECK-LABEL: @test_vpermilvar_ps(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
-  ret <4 x float> %a
-}
-
-define <8 x float> @test_vpermilvar_ps_256(<8 x float> %v) {
-; CHECK-LABEL: @test_vpermilvar_ps_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    ret <8 x float> [[TMP1]]
-;
-  %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
-  ret <8 x float> %a
-}
-
-define <16 x float> @test_vpermilvar_ps_512(<16 x float> %v) {
-; CHECK-LABEL: @test_vpermilvar_ps_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT:    ret <16 x float> [[TMP1]]
-;
-  %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
-  ret <16 x float> %a
-}
-
-define <2 x double> @test_vpermilvar_pd(<2 x double> %v) {
-; CHECK-LABEL: @test_vpermilvar_pd(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 0>
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 2, i64 0>)
-  ret <2 x double> %a
-}
-
-define <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) {
-; CHECK-LABEL: @test_vpermilvar_pd_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 3, i64 1, i64 2, i64 0>)
-  ret <4 x double> %a
-}
-
-define <8 x double> @test_vpermilvar_pd_512(<8 x double> %v) {
-; CHECK-LABEL: @test_vpermilvar_pd_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
-; CHECK-NEXT:    ret <8 x double> [[TMP1]]
-;
-  %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> <i64 3, i64 1, i64 2, i64 0, i64 7, i64 5, i64 6, i64 4>)
-  ret <8 x double> %a
-}
-
-; Verify that instcombine is able to fold constant shuffles with undef mask elements.
-
-define <4 x float> @undef_test_vpermilvar_ps(<4 x float> %v) {
-; CHECK-LABEL: @undef_test_vpermilvar_ps(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>)
-  ret <4 x float> %a
-}
-
-define <8 x float> @undef_test_vpermilvar_ps_256(<8 x float> %v) {
-; CHECK-LABEL: @undef_test_vpermilvar_ps_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 7, i32 6, i32 5, i32 4>
-; CHECK-NEXT:    ret <8 x float> [[TMP1]]
-;
-  %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 undef, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0>)
-  ret <8 x float> %a
-}
-
-define <16 x float> @undef_test_vpermilvar_ps_512(<16 x float> %v) {
-; CHECK-LABEL: @undef_test_vpermilvar_ps_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <16 x float> %v, <16 x float> undef, <16 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 7, i32 6, i32 5, i32 4, i32 undef, i32 10, i32 9, i32 undef, i32 15, i32 14, i32 13, i32 12>
-; CHECK-NEXT:    ret <16 x float> [[TMP1]]
-;
-  %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> <i32 undef, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0, i32 undef, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0>)
-  ret <16 x float> %a
-}
-
-define <2 x double> @undef_test_vpermilvar_pd(<2 x double> %v) {
-; CHECK-LABEL: @undef_test_vpermilvar_pd(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 undef, i32 0>
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 undef, i64 0>)
-  ret <2 x double> %a
-}
-
-define <4 x double> @undef_test_vpermilvar_pd_256(<4 x double> %v) {
-; CHECK-LABEL: @undef_test_vpermilvar_pd_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 undef, i32 0, i32 3, i32 undef>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 undef, i64 1, i64 2, i64 undef>)
-  ret <4 x double> %a
-}
-
-define <8 x double> @undef_test_vpermilvar_pd_512(<8 x double> %v) {
-; CHECK-LABEL: @undef_test_vpermilvar_pd_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x double> %v, <8 x double> undef, <8 x i32> <i32 undef, i32 0, i32 3, i32 undef, i32 undef, i32 4, i32 7, i32 undef>
-; CHECK-NEXT:    ret <8 x double> [[TMP1]]
-;
-  %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> <i64 undef, i64 1, i64 2, i64 undef, i64 undef, i64 1, i64 2, i64 undef>)
-  ret <8 x double> %a
-}
-
-; Simplify demanded elts
-
-define <4 x float> @elts_test_vpermilvar_ps(<4 x float> %a0, i32 %a1) {
-; CHECK-LABEL: @elts_test_vpermilvar_ps(
-; CHECK-NEXT:    ret <4 x float> %a0
-;
-  %1 = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %a1, i32 3
-  %2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %1)
-  %3 = shufflevector <4 x float> %2, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
-  ret <4 x float> %3
-}
-
-define <8 x float> @elts_test_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
-; CHECK-LABEL: @elts_test_vpermilvar_ps_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 6, i32 undef, i32 7>
-; CHECK-NEXT:    ret <8 x float> [[TMP1]]
-;
-  %1 = shufflevector <8 x i32> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 2, i32 1, i32 0>, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
-  %2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %1)
-  %3 = shufflevector <8 x float> %2, <8 x float> undef, <8 x i32> <i32 undef, i32 1, i32 undef, i32 3, i32 undef, i32 5, i32 undef, i32 7>
-  ret <8 x float> %3
-}
-
-define <16 x float> @elts_test_vpermilvar_ps_512(<16 x float> %a0, <16 x i32> %a1, i32 %a2) {
-; CHECK-LABEL: @elts_test_vpermilvar_ps_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> %a1)
-; CHECK-NEXT:    ret <16 x float> [[TMP1]]
-;
-  %1 = insertelement <16 x i32> %a1, i32 %a2, i32 0
-  %2 = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> %1)
-  %3 = shufflevector <16 x float> %2, <16 x float> undef, <16 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-  ret <16 x float> %3
-}
-
-define <2 x double> @elts_test_vpermilvar_pd(<2 x double> %a0, i64 %a1) {
-; CHECK-LABEL: @elts_test_vpermilvar_pd(
-; CHECK-NEXT:    ret <2 x double> %a0
-;
-  %1 = insertelement <2 x i64> <i64 0, i64 2>, i64 %a1, i32 1
-  %2 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %1)
-  %3 = shufflevector <2 x double> %2, <2 x double> undef, <2 x i32> <i32 0, i32 undef>
-  ret <2 x double> %3
-}
-
-define <4 x double> @elts_test_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
-; CHECK-LABEL: @elts_test_vpermilvar_pd_256(
-; CHECK-NEXT:    [[TMP1:%.*]] = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 undef>
-; CHECK-NEXT:    ret <4 x double> [[TMP1]]
-;
-  %1 = shufflevector <4 x i64> <i64 0, i64 2, i64 0, i64 2>, <4 x i64> %a1, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
-  %2 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %1)
-  %3 = shufflevector <4 x double> %2, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
-  ret <4 x double> %3
-}
-
-define <8 x double> @elts_test_vpermilvar_pd_512(<8 x double> %a0, <8 x i64> %a1, i64 %a2) {
-; CHECK-LABEL: @elts_test_vpermilvar_pd_512(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <8 x i64> undef, i64 %a2, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %a0, <8 x i64> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> undef, <8 x i32> zeroinitializer
-; CHECK-NEXT:    ret <8 x double> [[TMP3]]
-;
-  %1 = insertelement <8 x i64> %a1, i64 %a2, i32 0
-  %2 = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %a0, <8 x i64> %1)
-  %3 = shufflevector <8 x double> %2, <8 x double> undef, <8 x i32> zeroinitializer
-  ret <8 x double> %3
-}
-
-declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>)
-declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>)
-declare <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double>, <8 x i64>)
-
-declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>)
-declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>)
-declare <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float>, <16 x i32>)

Removed: llvm/trunk/test/Transforms/InstCombine/x86-xop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/x86-xop.ll?rev=302978&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/x86-xop.ll (original)
+++ llvm/trunk/test/Transforms/InstCombine/x86-xop.ll (removed)
@@ -1,305 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-define <2 x double> @test_vfrcz_sd(<2 x double> %a) {
-; CHECK-LABEL: @test_vfrcz_sd(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %a)
-; CHECK-NEXT:    ret <2 x double> [[TMP1]]
-;
-  %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
-  %2 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %1)
-  ret <2 x double> %2
-}
-
-define double @test_vfrcz_sd_0(double %a) {
-; CHECK-LABEL: @test_vfrcz_sd_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
-; CHECK-NEXT:    ret double [[TMP3]]
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2)
-  %4 = extractelement <2 x double> %3, i32 0
-  ret double %4
-}
-
-define double @test_vfrcz_sd_1(double %a) {
-; CHECK-LABEL: @test_vfrcz_sd_1(
-; CHECK-NEXT:    ret double 0.000000e+00
-;
-  %1 = insertelement <2 x double> undef, double %a, i32 0
-  %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
-  %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2)
-  %4 = extractelement <2 x double> %3, i32 1
-  ret double %4
-}
-
-define <4 x float> @test_vfrcz_ss(<4 x float> %a) {
-; CHECK-LABEL: @test_vfrcz_ss(
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %a)
-; CHECK-NEXT:    ret <4 x float> [[TMP1]]
-;
-  %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
-  %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
-  %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
-  %4 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %3)
-  ret <4 x float> %4
-}
-
-define float @test_vfrcz_ss_0(float %a) {
-; CHECK-LABEL: @test_vfrcz_ss_0(
-; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
-; CHECK-NEXT:    [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[TMP1]])
-; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
-; CHECK-NEXT:    ret float [[TMP3]]
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4)
-  %6 = extractelement <4 x float> %5, i32 0
-  ret float %6
-}
-
-define float @test_vfrcz_ss_3(float %a) {
-; CHECK-LABEL: @test_vfrcz_ss_3(
-; CHECK-NEXT:    ret float 0.000000e+00
-;
-  %1 = insertelement <4 x float> undef, float %a, i32 0
-  %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
-  %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
-  %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
-  %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4)
-  %6 = extractelement <4 x float> %5, i32 3
-  ret float %6
-}
-
-define <2 x i64> @cmp_slt_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: @cmp_slt_v2i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp slt <2 x i64> %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
-; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a, <2 x i64> %b)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @cmp_ult_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: @cmp_ult_v2i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult <2 x i64> %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
-; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a, <2 x i64> %b)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @cmp_sle_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: @cmp_sle_v2i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sle <2 x i64> %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
-; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a, <2 x i64> %b)
-  ret <2 x i64> %1
-}
-
-define <2 x i64> @cmp_ule_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: @cmp_ule_v2i64(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <2 x i64> %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
-; CHECK-NEXT:    ret <2 x i64> [[TMP2]]
-;
-  %1 = tail call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a, <2 x i64> %b)
-  ret <2 x i64> %1
-}
-
-define <4 x i32> @cmp_sgt_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: @cmp_sgt_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sgt <4 x i32> %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
-; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
-;
-  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a, <4 x i32> %b)
-  ret <4 x i32> %1
-}
-
-define <4 x i32> @cmp_ugt_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: @cmp_ugt_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ugt <4 x i32> %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
-; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
-;
-  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a, <4 x i32> %b)
-  ret <4 x i32> %1
-}
-
-define <4 x i32> @cmp_sge_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: @cmp_sge_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp sge <4 x i32> %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
-; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
-;
-  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a, <4 x i32> %b)
-  ret <4 x i32> %1
-}
-
-define <4 x i32> @cmp_uge_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: @cmp_uge_v4i32(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp uge <4 x i32> %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
-; CHECK-NEXT:    ret <4 x i32> [[TMP2]]
-;
-  %1 = tail call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a, <4 x i32> %b)
-  ret <4 x i32> %1
-}
-
-define <8 x i16> @cmp_seq_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: @cmp_seq_v8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i16> %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
-; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
-;
-  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a, <8 x i16> %b)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @cmp_ueq_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: @cmp_ueq_v8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq <8 x i16> %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
-; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
-;
-  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a, <8 x i16> %b)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @cmp_sne_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: @cmp_sne_v8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <8 x i16> %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
-; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
-;
-  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a, <8 x i16> %b)
-  ret <8 x i16> %1
-}
-
-define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: @cmp_une_v8i16(
-; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <8 x i16> %a, %b
-; CHECK-NEXT:    [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
-; CHECK-NEXT:    ret <8 x i16> [[TMP2]]
-;
-  %1 = tail call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a, <8 x i16> %b)
-  ret <8 x i16> %1
-}
-
-define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: @cmp_strue_v16i8(
-; CHECK-NEXT:    ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-;
-  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b)
-  ret <16 x i8> %1
-}
-
-define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: @cmp_utrue_v16i8(
-; CHECK-NEXT:    ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
-;
-  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b)
-  ret <16 x i8> %1
-}
-
-define <16 x i8> @cmp_sfalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: @cmp_sfalse_v16i8(
-; CHECK-NEXT:    ret <16 x i8> zeroinitializer
-;
-  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a, <16 x i8> %b)
-  ret <16 x i8> %1
-}
-
-define <16 x i8> @cmp_ufalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: @cmp_ufalse_v16i8(
-; CHECK-NEXT:    ret <16 x i8> zeroinitializer
-;
-  %1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a, <16 x i8> %b)
-  ret <16 x i8> %1
-}
-
-declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone
-declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone
-
-declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64>, <2 x i64>) nounwind readnone
-declare <16 x i8> @llvm.x86.xop.vpcomltub(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.x86.xop.vpcomltuw(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.x86.xop.vpcomltud(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.x86.xop.vpcomleb(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.x86.xop.vpcomlew(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.x86.xop.vpcomled(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64>, <2 x i64>) nounwind readnone
-declare <16 x i8> @llvm.x86.xop.vpcomleub(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.x86.xop.vpcomleuw(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.x86.xop.vpcomleud(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.x86.xop.vpcomgtb(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.x86.xop.vpcomgtw(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.x86.xop.vpcomgtq(<2 x i64>, <2 x i64>) nounwind readnone
-declare <16 x i8> @llvm.x86.xop.vpcomgtub(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.x86.xop.vpcomgtuw(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.x86.xop.vpcomgtuq(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.x86.xop.vpcomgeb(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.x86.xop.vpcomgew(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.x86.xop.vpcomgeq(<2 x i64>, <2 x i64>) nounwind readnone
-declare <16 x i8> @llvm.x86.xop.vpcomgeub(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.x86.xop.vpcomgeuw(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.x86.xop.vpcomgeuq(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.x86.xop.vpcomeqb(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.x86.xop.vpcomeqd(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.x86.xop.vpcomeqq(<2 x i64>, <2 x i64>) nounwind readnone
-declare <16 x i8> @llvm.x86.xop.vpcomequb(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.x86.xop.vpcomequd(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.x86.xop.vpcomequq(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.x86.xop.vpcomneb(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.x86.xop.vpcomned(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.x86.xop.vpcomneq(<2 x i64>, <2 x i64>) nounwind readnone
-declare <16 x i8> @llvm.x86.xop.vpcomneub(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.x86.xop.vpcomneud(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.x86.xop.vpcomneuq(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.x86.xop.vpcomfalsew(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.x86.xop.vpcomfalsed(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.x86.xop.vpcomfalseq(<2 x i64>, <2 x i64>) nounwind readnone
-declare <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.x86.xop.vpcomfalseuw(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.x86.xop.vpcomfalseud(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.x86.xop.vpcomfalseuq(<2 x i64>, <2 x i64>) nounwind readnone
-
-declare <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8>, <16 x i8>) nounwind readnone
-declare <4 x i32> @llvm.x86.xop.vpcomtrued(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.x86.xop.vpcomtrueq(<2 x i64>, <2 x i64>) nounwind readnone
-declare <8 x i16> @llvm.x86.xop.vpcomtruew(<8 x i16>, <8 x i16>) nounwind readnone
-declare <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8>, <16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.x86.xop.vpcomtrueuw(<8 x i16>, <8 x i16>) nounwind readnone
-declare <4 x i32> @llvm.x86.xop.vpcomtrueud(<4 x i32>, <4 x i32>) nounwind readnone
-declare <2 x i64> @llvm.x86.xop.vpcomtrueuq(<2 x i64>, <2 x i64>) nounwind readnone