[llvm] r283717 - [x86][inline-asm][llvm] accept 'v' constraint

Sun Oct 9 22:48:57 PDT 2016

Author: mzuckerm
Date: Mon Oct 10 00:48:56 2016
New Revision: 283717

URL: http://llvm.org/viewvc/llvm-project?rev=283717&view=rev
Log:
[x86][inline-asm][llvm] accept 'v' constraint

Commit in the name of:Coby Tayree
1.'v' constraint for (x86) non-avx arch imitates the already implemented 'x' constraint, i.e. allows XMM{0-15} & YMM{0-15} depending on the apparent arch & mode (32/64).
2.for the avx512 arch it allows [X,Y,Z]MM{0-31} (mode dependent)

This patch applies the needed changes to clang
 clang patch: https://reviews.llvm.org/D25004

Differential Revision: D25005
 

Added:
    llvm/trunk/test/CodeGen/X86/inline-asm-avx-v-constraint-32bit.ll
    llvm/trunk/test/CodeGen/X86/inline-asm-avx-v-constraint.ll
    llvm/trunk/test/CodeGen/X86/inline-asm-avx512f-v-constraint.ll
    llvm/trunk/test/CodeGen/X86/inline-asm-avx512vl-v-constraint-32bit.ll
    llvm/trunk/test/CodeGen/X86/inline-asm-avx512vl-v-constraint.ll
Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=283717&r1=283716&r2=283717&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Oct 10 00:48:56 2016
@@ -32024,6 +32024,7 @@ X86TargetLowering::getConstraintType(Str
     case 'u':
     case 'y':
     case 'x':
+    case 'v':
     case 'Y':
     case 'l':
       return C_RegisterClass;
@@ -32093,6 +32094,10 @@ TargetLowering::ConstraintWeight
     if (type->isX86_MMXTy() && Subtarget.hasMMX())
       weight = CW_SpecificReg;
     break;
+  case 'v':
+    if ((type->getPrimitiveSizeInBits() == 512) && Subtarget.hasAVX512())
+      weight = CW_Register;
+    LLVM_FALLTHROUGH;
   case 'x':
   case 'Y':
     if (((type->getPrimitiveSizeInBits() == 128) && Subtarget.hasSSE1()) ||
@@ -32429,17 +32434,23 @@ X86TargetLowering::getRegForInlineAsmCon
     case 'Y':   // SSE_REGS if SSE2 allowed
       if (!Subtarget.hasSSE2()) break;
       LLVM_FALLTHROUGH;
+    case 'v':
     case 'x':   // SSE_REGS if SSE1 allowed or AVX_REGS if AVX allowed
       if (!Subtarget.hasSSE1()) break;
+      bool VConstraint = (Constraint[0] == 'v');
 
       switch (VT.SimpleTy) {
       default: break;
       // Scalar SSE types.
       case MVT::f32:
       case MVT::i32:
+        if (VConstraint && Subtarget.hasAVX512() && Subtarget.hasVLX())
+          return std::make_pair(0U, &X86::FR32XRegClass);
         return std::make_pair(0U, &X86::FR32RegClass);
       case MVT::f64:
       case MVT::i64:
+        if (VConstraint && Subtarget.hasVLX())
+          return std::make_pair(0U, &X86::FR64XRegClass);
         return std::make_pair(0U, &X86::FR64RegClass);
       // TODO: Handle f128 and i128 in FR128RegClass after it is tested well.
       // Vector types.
@@ -32449,6 +32460,8 @@ X86TargetLowering::getRegForInlineAsmCon
       case MVT::v2i64:
       case MVT::v4f32:
       case MVT::v2f64:
+        if (VConstraint && Subtarget.hasVLX())
+          return std::make_pair(0U, &X86::VR128XRegClass);
         return std::make_pair(0U, &X86::VR128RegClass);
       // AVX types.
       case MVT::v32i8:
@@ -32457,6 +32470,8 @@ X86TargetLowering::getRegForInlineAsmCon
       case MVT::v4i64:
       case MVT::v8f32:
       case MVT::v4f64:
+        if (VConstraint && Subtarget.hasVLX())
+          return std::make_pair(0U, &X86::VR256XRegClass);
         return std::make_pair(0U, &X86::VR256RegClass);
       case MVT::v8f64:
       case MVT::v16f32:

Added: llvm/trunk/test/CodeGen/X86/inline-asm-avx-v-constraint-32bit.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/inline-asm-avx-v-constraint-32bit.ll?rev=283717&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/inline-asm-avx-v-constraint-32bit.ll (added)
+++ llvm/trunk/test/CodeGen/X86/inline-asm-avx-v-constraint-32bit.ll Mon Oct 10 00:48:56 2016
@@ -0,0 +1,136 @@
+; RUN: not llc < %s -mtriple i386-unknown-linux-gnu -mattr +avx 1> /dev/null 2> %t
+; RUN: FileCheck %s --input-file %t
+
+define <4 x float> @testXMM_1(<4 x float> %_xmm0, i32 %_l) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l, <4 x float> %_xmm0)
+  ret <4 x float> %0
+}
+
+define <4 x float> @testXMM_2(<4 x float> %_xmm0, i32 %_l) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <4 x float> asm "movapd $1, $0", "=v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l)
+  ret <4 x float> %0
+}
+
+define <4 x float> @testXMM_3(<4 x float> %_xmm0, i32 %_l) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <4 x float> asm "vmovapd $1, $0", "=v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l)
+  ret <4 x float> %0
+}
+
+define <4 x float> @testXMM_4(<4 x float> %_xmm0, i32 %_l) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <4 x float> asm "vmpsadbw $$0, $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l, <4 x float> %_xmm0)
+  ret <4 x float> %0
+}
+
+define <4 x float> @testXMM_5(<4 x float> %_xmm0, i32 %_l) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <4 x float> asm "vminpd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l, i32 %_l)
+  ret <4 x float> %0
+}
+
+define i32 @testXMM_6(i32 returned %_l) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  tail call void asm sideeffect "vmovd $0, %eax", "v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l)
+  ret i32 %_l
+}
+
+define <4 x float> @testXMM_7(<4 x float> returned %_xmm0) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  tail call void asm sideeffect "vmovmskps $0, %eax", "v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(<4 x float> %_xmm0)
+  ret <4 x float> %_xmm0
+}
+
+define i32 @testXMM_8(<4 x float> %_xmm0, i32 %_l) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call i32 asm "vmulsd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l, <4 x float> %_xmm0)
+  ret i32 %0
+}
+
+define <4 x float> @testXMM_9(<4 x float> %_xmm0, i32 %_l) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <4 x float> asm "vorpd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l, <4 x float> %_xmm0)
+  ret <4 x float> %0
+}
+
+define <4 x float> @testXMM_10(<4 x float> %_xmm0, i32 %_l) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <4 x float> asm "pabsb $1, $0", "=v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l)
+  ret <4 x float> %0
+}
+
+define <4 x float> @testXMM_11(<4 x float> %_xmm0, i32 %_l) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <4 x float> asm "vpabsd $1, $0", "=v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i32 %_l)
+  ret <4 x float> %0
+}
+
+define <8 x float> @testYMM_1(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm0)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM_2(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <8 x float> asm "vmovapd $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM_3(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <8 x float> asm "vminpd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM_4(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <8 x float> asm "vorpd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <8 x float> asm "vmulps $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM_6(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <8 x float> asm "vmulpd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM_7(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <8 x float> asm "vmovups $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM_8(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <8 x float> asm "vmovupd $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
+  ret <8 x float> %0
+}
+

Added: llvm/trunk/test/CodeGen/X86/inline-asm-avx-v-constraint.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/inline-asm-avx-v-constraint.ll?rev=283717&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/inline-asm-avx-v-constraint.ll (added)
+++ llvm/trunk/test/CodeGen/X86/inline-asm-avx-v-constraint.ll Mon Oct 10 00:48:56 2016
@@ -0,0 +1,136 @@
+; RUN: llc < %s -march x86-64 -mtriple x86_64-unknown-linux-gnu -mattr +avx | FileCheck %s
+; RUN: llc < %s -march x86-64 -mtriple x86_64-unknown-linux-gnu -mattr +avx512f | FileCheck %s
+
+define <4 x float> @testXMM_1(<4 x float> %_xmm0, i64 %_l)  {
+; CHECK: vmovhlps  %xmm1, %xmm0, %xmm0
+entry:
+  %0 = tail call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l, <4 x float> %_xmm0)
+  ret <4 x float> %0
+}
+
+define <4 x float> @testXMM_2(<4 x float> %_xmm0, i64 %_l)  {
+; CHECK: movapd  %xmm0, %xmm0
+entry:
+  %0 = tail call <4 x float> asm "movapd $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l)
+  ret <4 x float> %0
+}
+
+define <4 x float> @testXMM_3(<4 x float> %_xmm0, i64 %_l)  {
+; CHECK: vmovapd %xmm0, %xmm0
+entry:
+  %0 = tail call <4 x float> asm "vmovapd $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l)
+  ret <4 x float> %0
+}
+
+define <4 x float> @testXMM_4(<4 x float> %_xmm0, i64 %_l)  {
+; CHECK: vmpsadbw  $0, %xmm1, %xmm0, %xmm0
+entry:
+  %0 = tail call <4 x float> asm "vmpsadbw $$0, $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l, <4 x float> %_xmm0)
+  ret <4 x float> %0
+}
+
+define <4 x float> @testXMM_5(<4 x float> %_xmm0, i64 %_l)  {
+; CHECK: vminpd  %xmm0, %xmm0, %xmm0
+entry:
+  %0 = tail call <4 x float> asm "vminpd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l, i64 %_l)
+  ret <4 x float> %0
+}
+
+define i64 @testXMM_6(i64 returned %_l)  {
+; CHECK: vmovd %xmm0, %eax
+entry:
+  tail call void asm sideeffect "vmovd $0, %eax", "v,~{dirflag},~{fpsr},~{flags}"(i64 %_l)
+  ret i64 %_l
+}
+
+define <4 x float> @testXMM_7(<4 x float> returned %_xmm0) {
+; CHECK: vmovmskps %xmm0, %eax
+entry:
+  tail call void asm sideeffect "vmovmskps $0, %rax", "v,~{dirflag},~{fpsr},~{flags}"(<4 x float> %_xmm0)
+  ret <4 x float> %_xmm0
+}
+
+define i64 @testXMM_8(<4 x float> %_xmm0, i64 %_l)  {
+; CHECK: vmulsd  %xmm1, %xmm0, %xmm0
+entry:
+  %0 = tail call i64 asm "vmulsd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l, <4 x float> %_xmm0)
+  ret i64 %0
+}
+
+define <4 x float> @testXMM_9(<4 x float> %_xmm0, i64 %_l)  {
+; CHECK: vorpd %xmm1, %xmm0, %xmm0
+entry:
+  %0 = tail call <4 x float> asm "vorpd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l, <4 x float> %_xmm0)
+  ret <4 x float> %0
+}
+
+define <4 x float> @testXMM_10(<4 x float> %_xmm0, i64 %_l)  {
+; CHECK: pabsb %xmm0, %xmm0
+entry:
+  %0 = tail call <4 x float> asm "pabsb $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l)
+  ret <4 x float> %0
+}
+
+define <4 x float> @testXMM_11(<4 x float> %_xmm0, i64 %_l)  {
+; CHECK: vpabsd  %xmm0, %xmm0
+entry:
+  %0 = tail call <4 x float> asm "vpabsd $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(i64 %_l)
+  ret <4 x float> %0
+}
+
+define <8 x float> @testYMM_1(<8 x float> %_ymm0, <8 x float> %_ymm1)  {
+; CHECK: vmovsldup %ymm0, %ymm0
+entry:
+  %0 = tail call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm0)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM_2(<8 x float> %_ymm0, <8 x float> %_ymm1)  {
+; CHECK: vmovapd %ymm1, %ymm0
+entry:
+  %0 = tail call <8 x float> asm "vmovapd $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM_3(<8 x float> %_ymm0, <8 x float> %_ymm1)  {
+; CHECK: vminpd  %ymm1, %ymm0, %ymm0
+entry:
+  %0 = tail call <8 x float> asm "vminpd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM_4(<8 x float> %_ymm0, <8 x float> %_ymm1)  {
+; CHECK: vorpd %ymm1, %ymm0, %ymm0
+entry:
+  %0 = tail call <8 x float> asm "vorpd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM(<8 x float> %_ymm0, <8 x float> %_ymm1)  {
+; CHECK: vmulps  %ymm1, %ymm0, %ymm0
+entry:
+  %0 = tail call <8 x float> asm "vmulps $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM_6(<8 x float> %_ymm0, <8 x float> %_ymm1)  {
+; CHECK: vmulpd  %ymm1, %ymm0, %ymm0
+entry:
+  %0 = tail call <8 x float> asm "vmulpd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM_7(<8 x float> %_ymm0, <8 x float> %_ymm1)  {
+; CHECK: vmovups %ymm1, %ymm0
+entry:
+  %0 = tail call <8 x float> asm "vmovups $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM_8(<8 x float> %_ymm0, <8 x float> %_ymm1)  {
+; CHECK: vmovupd %ymm1, %ymm0
+entry:
+  %0 = tail call <8 x float> asm "vmovupd $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
+  ret <8 x float> %0
+}
+

Added: llvm/trunk/test/CodeGen/X86/inline-asm-avx512f-v-constraint.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/inline-asm-avx512f-v-constraint.ll?rev=283717&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/inline-asm-avx512f-v-constraint.ll (added)
+++ llvm/trunk/test/CodeGen/X86/inline-asm-avx512f-v-constraint.ll Mon Oct 10 00:48:56 2016
@@ -0,0 +1,72 @@
+; RUN: llc < %s -march x86-64 -mtriple x86_64-unknown-linux-gnu -mattr +avx512f | FileCheck %s
+
+define <16 x float> @testZMM_1(<16 x float> %_zmm0, <16 x float> %_zmm1) {
+entry:
+; CHECK: vpternlogd  $0, %zmm1, %zmm0, %zmm0
+  %0 = tail call <16 x float> asm "vpternlogd $$0, $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %_zmm1, <16 x float> %_zmm0)
+  ret <16 x float> %0
+}
+
+define <16 x float> @testZMM_2(<16 x float> %_zmm0, <16 x float> %_zmm1) {
+entry:
+; CHECK: vpabsq  %zmm1, %zmm0
+  %0 = tail call <16 x float> asm "vpabsq $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %_zmm1)
+  ret <16 x float> %0
+}
+
+
+define <16 x float> @testZMM_3(<16 x float> %_zmm0, <16 x float> %_zmm1) {
+entry:
+; CHECK: vpaddd  %zmm1, %zmm1, %zmm0
+  %0 = tail call <16 x float> asm "vpaddd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %_zmm1, <16 x float> %_zmm1)
+  ret <16 x float> %0
+}
+
+
+define <16 x float> @testZMM_4(<16 x float> %_zmm0, <16 x float> %_zmm1) {
+entry:
+; CHECK: vpaddq  %zmm1, %zmm1, %zmm0
+  %0 = tail call <16 x float> asm "vpaddq $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %_zmm1, <16 x float> %_zmm1)
+  ret <16 x float> %0
+}
+
+
+define <16 x float> @testZMM_5(<16 x float> %_zmm0, <16 x float> %_zmm1) {
+entry:
+; CHECK: vpandd  %zmm1, %zmm1, %zmm0
+  %0 = tail call <16 x float> asm "vpandd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %_zmm1, <16 x float> %_zmm1)
+  ret <16 x float> %0
+}
+
+
+define <16 x float> @testZMM_6(<16 x float> %_zmm0, <16 x float> %_zmm1) {
+entry:
+; CHECK: vpandnd %zmm1, %zmm1, %zmm0
+  %0 = tail call <16 x float> asm "vpandnd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %_zmm1, <16 x float> %_zmm1)
+  ret <16 x float> %0
+}
+
+
+define <16 x float> @testZMM_7(<16 x float> %_zmm0, <16 x float> %_zmm1) {
+entry:
+; CHECK: vpmaxsd %zmm1, %zmm1, %zmm0
+  %0 = tail call <16 x float> asm "vpmaxsd $1, $2, $0", "=v,v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %_zmm1, <16 x float> %_zmm1)
+  ret <16 x float> %0
+}
+
+
+define <16 x float> @testZMM_8(<16 x float> %_zmm0, <16 x float> %_zmm1) {
+entry:
+; CHECK: vmovups %zmm1, %zmm0
+  %0 = tail call <16 x float> asm "vmovups $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %_zmm1)
+  ret <16 x float> %0
+}
+
+
+define <16 x float> @testZMM_9(<16 x float> %_zmm0, <16 x float> %_zmm1) {
+entry:
+; CHECK: vmovupd %zmm1, %zmm0
+  %0 = tail call <16 x float> asm "vmovupd $1, $0", "=v,v,~{dirflag},~{fpsr},~{flags}"(<16 x float> %_zmm1)
+  ret <16 x float> %0
+}
+

Added: llvm/trunk/test/CodeGen/X86/inline-asm-avx512vl-v-constraint-32bit.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/inline-asm-avx512vl-v-constraint-32bit.ll?rev=283717&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/inline-asm-avx512vl-v-constraint-32bit.ll (added)
+++ llvm/trunk/test/CodeGen/X86/inline-asm-avx512vl-v-constraint-32bit.ll Mon Oct 10 00:48:56 2016
@@ -0,0 +1,138 @@
+; RUN: not llc < %s -mtriple i386-unknown-linux-gnu -mattr +avx512vl 1> /dev/null 2> %t
+; RUN: FileCheck %s --input-file %t
+
+define <4 x float> @testXMM_1(<4 x float> %_xmm0, i64 %_l) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i64 %_l, <4 x float> %_xmm0)
+  ret <4 x float> %0
+}
+
+
+define <4 x float> @testXMM_2(<4 x float> %_xmm0, i64 %_l) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <4 x float> asm "vmovapd $1, $0", "=v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i64 %_l)
+  ret <4 x float> %0
+}
+
+
+define <4 x float> @testXMM_3(<4 x float> %_xmm0, i64 %_l) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <4 x float> asm "vminpd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i64 %_l, i64 %_l)
+  ret <4 x float> %0
+}
+
+
+define i64 @testXMM_4(<4 x float> %_xmm0, i64 %_l) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call i64 asm "vmulsd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i64 %_l, <4 x float> %_xmm0)
+  ret i64 %0
+}
+
+
+define <4 x float> @testXMM_5(<4 x float> %_xmm0, i64 %_l) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <4 x float> asm "vpabsq $1, $0", "=v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(i64 %_l)
+  ret <4 x float> %0
+}
+
+
+define <4 x float> @testXMM_6(<4 x float> %_xmm0, i64 %_l) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <4 x float> asm "vpandd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(<4 x float> %_xmm0, i64 %_l)
+  ret <4 x float> %0
+}
+
+
+define <4 x float> @testXMM_7(<4 x float> %_xmm0, i64 %_l) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <4 x float> asm "vpandnd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{dirflag},~{fpsr},~{flags}"(<4 x float> %_xmm0, i64 %_l)
+  ret <4 x float> %0
+}
+
+
+define <8 x float> @testYMM_1(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
+  ret <8 x float> %0
+}
+
+
+define <8 x float> @testYMM_2(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <8 x float> asm "vmovapd $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
+  ret <8 x float> %0
+}
+
+
+define <8 x float> @testYMM_3(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <8 x float> asm "vminpd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm1)
+  ret <8 x float> %0
+}
+
+
+define <8 x float> @testYMM_4(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <8 x float> asm "vpabsq $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
+  ret <8 x float> %0
+}
+
+
+define <8 x float> @testYMM_5(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <8 x float> asm "vpandd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
+  ret <8 x float> %0
+}
+
+
+define <8 x float> @testYMM_6(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <8 x float> asm "vpandnd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
+  ret <8 x float> %0
+}
+
+
+define <8 x float> @testYMM_7(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <8 x float> asm "vpminud $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
+  ret <8 x float> %0
+}
+
+
+define <8 x float> @testYMM_8(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <8 x float> asm "vpmaxsd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
+  ret <8 x float> %0
+}
+
+
+define <8 x float> @testYMM_9(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <8 x float> asm "vmovups $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
+  ret <8 x float> %0
+}
+
+
+define <8 x float> @testYMM_10(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+; CHECK: error: inline assembly requires more registers than available
+entry:
+  %0 = tail call <8 x float> asm "vmovupd $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
+  ret <8 x float> %0
+}
+

Added: llvm/trunk/test/CodeGen/X86/inline-asm-avx512vl-v-constraint.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/inline-asm-avx512vl-v-constraint.ll?rev=283717&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/inline-asm-avx512vl-v-constraint.ll (added)
+++ llvm/trunk/test/CodeGen/X86/inline-asm-avx512vl-v-constraint.ll Mon Oct 10 00:48:56 2016
@@ -0,0 +1,121 @@
+; RUN: llc < %s -march x86-64 -mtriple x86_64-unknown-linux-gnu -mattr +avx512vl | FileCheck %s
+
+define <4 x float> @testXMM_1(<4 x float> %_xmm0, i64 %_l) {
+entry:
+; CHECK: vmovhlps  %xmm17, %xmm16, %xmm16
+  %0 = tail call <4 x float> asm "vmovhlps $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"(i64 %_l, <4 x float> %_xmm0)
+  ret <4 x float> %0
+}
+
+define <4 x float> @testXMM_2(<4 x float> %_xmm0, i64 %_l) {
+entry:
+; CHECK: vmovapd %xmm16, %xmm16
+  %0 = tail call <4 x float> asm "vmovapd $1, $0", "=v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"(i64 %_l)
+  ret <4 x float> %0
+}
+
+define <4 x float> @testXMM_3(<4 x float> %_xmm0, i64 %_l) {
+entry:
+; CHECK: vminpd  %xmm16, %xmm16, %xmm16
+  %0 = tail call <4 x float> asm "vminpd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"(i64 %_l, i64 %_l)
+  ret <4 x float> %0
+}
+
+define i64 @testXMM_4(<4 x float> %_xmm0, i64 %_l) {
+entry:
+; CHECK: vmulsd  %xmm17, %xmm16, %xmm16
+  %0 = tail call i64 asm "vmulsd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"(i64 %_l, <4 x float> %_xmm0)
+  ret i64 %0
+}
+
+define <4 x float> @testXMM_5(<4 x float> %_xmm0, i64 %_l) {
+entry:
+; CHECK: vpabsq  %xmm16, %xmm16
+  %0 = tail call <4 x float> asm "vpabsq $1, $0", "=v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"(i64 %_l)
+  ret <4 x float> %0
+}
+
+define <4 x float> @testXMM_6(<4 x float> %_xmm0, i64 %_l) {
+entry:
+; CHECK: vpandd  %xmm16, %xmm17, %xmm16
+  %0 = tail call <4 x float> asm "vpandd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"(<4 x float> %_xmm0, i64 %_l)
+  ret <4 x float> %0
+}
+
+define <4 x float> @testXMM_7(<4 x float> %_xmm0, i64 %_l) {
+entry:
+; CHECK: vpandnd %xmm16, %xmm17, %xmm16
+  %0 = tail call <4 x float> asm "vpandnd $1, $2, $0", "=v,v,v,~{xmm0},~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{dirflag},~{fpsr},~{flags}"(<4 x float> %_xmm0, i64 %_l)
+  ret <4 x float> %0
+}
+
+define <8 x float> @testYMM_1(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+entry:
+; CHECK: vmovsldup %ymm16, %ymm16
+  %0 = tail call <8 x float> asm "vmovsldup $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{ymm8},~{ymm9},~{ymm10},~{ymm11},~{ymm12},~{ymm13},~{ymm14},~{ymm15},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM_2(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+entry:
+; CHECK: vmovapd %ymm16, %ymm16
+  %0 = tail call <8 x float> asm "vmovapd $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{ymm8},~{ymm9},~{ymm10},~{ymm11},~{ymm12},~{ymm13},~{ymm14},~{ymm15},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM_3(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+entry:
+; CHECK: vminpd  %ymm16, %ymm16, %ymm16
+  %0 = tail call <8 x float> asm "vminpd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{ymm8},~{ymm9},~{ymm10},~{ymm11},~{ymm12},~{ymm13},~{ymm14},~{ymm15},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm1)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM_4(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+entry:
+; CHECK: vpabsq  %ymm16, %ymm16
+  %0 = tail call <8 x float> asm "vpabsq $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{ymm8},~{ymm9},~{ymm10},~{ymm11},~{ymm12},~{ymm13},~{ymm14},~{ymm15},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM_5(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+entry:
+; CHECK: vpandd  %ymm16, %ymm17, %ymm16
+  %0 = tail call <8 x float> asm "vpandd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{ymm8},~{ymm9},~{ymm10},~{ymm11},~{ymm12},~{ymm13},~{ymm14},~{ymm15},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM_6(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+entry:
+; CHECK: vpandnd %ymm16, %ymm17, %ymm16
+  %0 = tail call <8 x float> asm "vpandnd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{ymm8},~{ymm9},~{ymm10},~{ymm11},~{ymm12},~{ymm13},~{ymm14},~{ymm15},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM_7(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+entry:
+; CHECK: vpminud %ymm16, %ymm17, %ymm16
+  %0 = tail call <8 x float> asm "vpminud $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{ymm8},~{ymm9},~{ymm10},~{ymm11},~{ymm12},~{ymm13},~{ymm14},~{ymm15},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM_8(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+entry:
+; CHECK: vpmaxsd %ymm16, %ymm17, %ymm16
+  %0 = tail call <8 x float> asm "vpmaxsd $1, $2, $0", "=v,v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{ymm8},~{ymm9},~{ymm10},~{ymm11},~{ymm12},~{ymm13},~{ymm14},~{ymm15},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1, <8 x float> %_ymm0)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM_9(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+entry:
+; CHECK: vmovups %ymm16, %ymm16
+  %0 = tail call <8 x float> asm "vmovups $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{ymm8},~{ymm9},~{ymm10},~{ymm11},~{ymm12},~{ymm13},~{ymm14},~{ymm15},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
+  ret <8 x float> %0
+}
+
+define <8 x float> @testYMM_10(<8 x float> %_ymm0, <8 x float> %_ymm1) {
+entry:
+; CHECK: vmovupd %ymm16, %ymm16
+  %0 = tail call <8 x float> asm "vmovupd $1, $0", "=v,v,~{ymm0},~{ymm1},~{ymm2},~{ymm3},~{ymm4},~{ymm5},~{ymm6},~{ymm7},~{ymm8},~{ymm9},~{ymm10},~{ymm11},~{ymm12},~{ymm13},~{ymm14},~{ymm15},~{dirflag},~{fpsr},~{flags}"(<8 x float> %_ymm1)
+  ret <8 x float> %0
+}
+