[llvm] r368594 - [X86] Disable use of zmm registers for varargs musttail calls under prefer-vector-width=256 and min-legal-vector-width=256.

Mon Aug 12 10:43:26 PDT 2019

Author: ctopper
Date: Mon Aug 12 10:43:26 2019
New Revision: 368594

URL: http://llvm.org/viewvc/llvm-project?rev=368594&view=rev
Log:
[X86] Disable use of zmm registers for varargs musttail calls under prefer-vector-width=256 and min-legal-vector-width=256.

Under this config, the v16f32 type we try to use isn't to a register
class so the getRegClassFor call will fail.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/test/CodeGen/X86/musttail-fastcall.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=368594&r1=368593&r2=368594&view=diff
==============================================================================

--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Mon Aug 12 10:43:26 2019
@@ -3348,7 +3348,7 @@ SDValue X86TargetLowering::LowerFormalAr
     // Find the largest legal vector type.
     MVT VecVT = MVT::Other;
     // FIXME: Only some x86_32 calling conventions support AVX512.
-    if (Subtarget.hasAVX512() &&
+    if (Subtarget.useAVX512Regs() &&
         (Is64Bit || (CallConv == CallingConv::X86_VectorCall ||
                      CallConv == CallingConv::Intel_OCL_BI)))
       VecVT = MVT::v16f32;

Modified: llvm/trunk/test/CodeGen/X86/musttail-fastcall.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/musttail-fastcall.ll?rev=368594&r1=368593&r2=368594&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/musttail-fastcall.ll (original)
+++ llvm/trunk/test/CodeGen/X86/musttail-fastcall.ll Mon Aug 12 10:43:26 2019
@@ -1,6 +1,7 @@
 ; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2 | FileCheck %s --check-prefix=CHECK --check-prefix=SSE2
 ; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
-; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx,+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512
+; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx,+avx512f | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512F
+; RUN: llc < %s -mtriple=i686-pc-win32 -mattr=+sse2,+avx,+avx512vl | FileCheck %s --check-prefix=CHECK --check-prefix=AVX512 --check-prefix=AVX512VL
 
 ; While we don't support varargs with fastcall, we do support forwarding.
 
@@ -106,4 +107,91 @@ define x86_vectorcallcc i32 @vector_targ
   %a0 = add i32 %a, %b
   %a1 = add i32 %a0, %c
   ret i32 %a1
+}
+
+; Repeat the test for vectorcall, which has XMM registers.
+
+define i32 @call_vector_thunk_prefer256() "min-legal-vector-width"="256" "prefer-vector-width"="256" {
+  %r = call x86_vectorcallcc i32 (...) @vector_thunk_prefer256(i32 inreg 1, i32 inreg 2, i32 3)
+  ret i32 %r
+}
+
+define x86_vectorcallcc i32 @vector_thunk_prefer256(...) "min-legal-vector-width"="256" "prefer-vector-width"="256" {
+  call void @puts(i8* getelementptr ([4 x i8], [4 x i8]* @asdf, i32 0, i32 0))
+  %r = musttail call x86_vectorcallcc i32 (...) bitcast (i32 (i32, i32, i32)* @vector_target_prefer256 to i32 (...)*) (...)
+  ret i32 %r
+}
+
+; Check that we spill and fill SSE registers around the call to puts.
+
+; CHECK-LABEL: vector_thunk_prefer256@@0:
+; CHECK-DAG: movl %ecx, {{.*}}
+; CHECK-DAG: movl %edx, {{.*}}
+
+; SSE2-DAG: movups %xmm0, {{.*}}
+; SSE2-DAG: movups %xmm1, {{.*}}
+; SSE2-DAG: movups %xmm2, {{.*}}
+; SSE2-DAG: movups %xmm3, {{.*}}
+; SSE2-DAG: movups %xmm4, {{.*}}
+; SSE2-DAG: movups %xmm5, {{.*}}
+
+; AVX-DAG: vmovups %ymm0, {{.*}}
+; AVX-DAG: vmovups %ymm1, {{.*}}
+; AVX-DAG: vmovups %ymm2, {{.*}}
+; AVX-DAG: vmovups %ymm3, {{.*}}
+; AVX-DAG: vmovups %ymm4, {{.*}}
+; AVX-DAG: vmovups %ymm5, {{.*}}
+
+; AVX512F-DAG: vmovups %zmm0, {{.*}}
+; AVX512F-DAG: vmovups %zmm1, {{.*}}
+; AVX512F-DAG: vmovups %zmm2, {{.*}}
+; AVX512F-DAG: vmovups %zmm3, {{.*}}
+; AVX512F-DAG: vmovups %zmm4, {{.*}}
+; AVX512F-DAG: vmovups %zmm5, {{.*}}
+
+; AVX512VL-DAG: vmovups %ymm0, {{.*}}
+; AVX512VL-DAG: vmovups %ymm1, {{.*}}
+; AVX512VL-DAG: vmovups %ymm2, {{.*}}
+; AVX512VL-DAG: vmovups %ymm3, {{.*}}
+; AVX512VL-DAG: vmovups %ymm4, {{.*}}
+; AVX512VL-DAG: vmovups %ymm5, {{.*}}
+
+; CHECK: calll _puts
+
+; SSE2-DAG: movups {{.*}}, %xmm0
+; SSE2-DAG: movups {{.*}}, %xmm1
+; SSE2-DAG: movups {{.*}}, %xmm2
+; SSE2-DAG: movups {{.*}}, %xmm3
+; SSE2-DAG: movups {{.*}}, %xmm4
+; SSE2-DAG: movups {{.*}}, %xmm5
+
+; AVX-DAG: vmovups {{.*}}, %ymm0
+; AVX-DAG: vmovups {{.*}}, %ymm1
+; AVX-DAG: vmovups {{.*}}, %ymm2
+; AVX-DAG: vmovups {{.*}}, %ymm3
+; AVX-DAG: vmovups {{.*}}, %ymm4
+; AVX-DAG: vmovups {{.*}}, %ymm5
+
+; AVX512F-DAG: vmovups {{.*}}, %zmm0
+; AVX512F-DAG: vmovups {{.*}}, %zmm1
+; AVX512F-DAG: vmovups {{.*}}, %zmm2
+; AVX512F-DAG: vmovups {{.*}}, %zmm3
+; AVX512F-DAG: vmovups {{.*}}, %zmm4
+; AVX512F-DAG: vmovups {{.*}}, %zmm5
+
+; AVX512VL-DAG: vmovups {{.*}}, %ymm0
+; AVX512VL-DAG: vmovups {{.*}}, %ymm1
+; AVX512VL-DAG: vmovups {{.*}}, %ymm2
+; AVX512VL-DAG: vmovups {{.*}}, %ymm3
+; AVX512VL-DAG: vmovups {{.*}}, %ymm4
+; AVX512VL-DAG: vmovups {{.*}}, %ymm5
+
+; CHECK-DAG: movl {{.*}}, %ecx
+; CHECK-DAG: movl {{.*}}, %edx
+; CHECK: jmp vector_target_prefer256@@12
+
+define x86_vectorcallcc i32 @vector_target_prefer256(i32 inreg %a, i32 inreg %b, i32 %c) "min-legal-vector-width"="256" "prefer-vector-width"="256" {
+  %a0 = add i32 %a, %b
+  %a1 = add i32 %a0, %c
+  ret i32 %a1
 }