[clang] 0edb212 - [MS] Mark vectorcall FP and vector args inreg

Wed Feb 19 16:38:47 PST 2020

Author: Reid Kleckner
Date: 2020-02-19T16:37:50-08:00
New Revision: 0edb2129258c727e9efe6fa234b28880ff64c1b9

URL: https://github.com/llvm/llvm-project/commit/0edb2129258c727e9efe6fa234b28880ff64c1b9
DIFF: https://github.com/llvm/llvm-project/commit/0edb2129258c727e9efe6fa234b28880ff64c1b9.diff

LOG: [MS] Mark vectorcall FP and vector args inreg

This has no effect on how LLVM passes the arguments, but it prevents
rewriteWithInAlloca from thinking that these parameters should be part
of the inalloca pack.

Follow-up to D72114

Reviewed By: erichkeane

Differential Revision: https://reviews.llvm.org/D74452

Added: 
    

Modified: 
    clang/lib/CodeGen/TargetInfo.cpp
    clang/test/CodeGen/vectorcall.c
    clang/test/CodeGenCXX/inalloca-vector.cpp

Removed: 
    


################################################################################
diff  --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index e40f24d0ca4d..21d5cd08c9fa 100644

--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -1687,7 +1687,7 @@ void X86_32ABIInfo::runVectorCallFirstPass(CGFunctionInfo &FI, CCState &State) c
         isHomogeneousAggregate(Ty, Base, NumElts)) {
       if (State.FreeSSERegs >= NumElts) {
         State.FreeSSERegs -= NumElts;
-        Args[I].info = ABIArgInfo::getDirect();
+        Args[I].info = ABIArgInfo::getDirectInReg();
         State.IsPreassigned.set(I);
       }
     }

diff  --git a/clang/test/CodeGen/vectorcall.c b/clang/test/CodeGen/vectorcall.c
index aa529d087011..77db600a7f89 100644
--- a/clang/test/CodeGen/vectorcall.c
+++ b/clang/test/CodeGen/vectorcall.c
@@ -31,13 +31,13 @@ void __vectorcall hfa1(int a, struct HFA4 b, int c) {}
 // indirectly. Additional vector arguments can consume the rest of the SSE
 // registers.
 void __vectorcall hfa2(struct HFA4 a, struct HFA4 b, double c) {}
-// X32: define dso_local x86_vectorcallcc void @"\01hfa2@@72"(%struct.HFA4 inreg %a.coerce, %struct.HFA4* inreg %b, double %c)
+// X32: define dso_local x86_vectorcallcc void @"\01hfa2@@72"(%struct.HFA4 inreg %a.coerce, %struct.HFA4* inreg %b, double inreg %c)
 // X64: define dso_local x86_vectorcallcc void @"\01hfa2@@72"(%struct.HFA4 inreg %a.coerce, %struct.HFA4* %b, double %c)
 
 // Ensure that we pass builtin types directly while counting them against the
 // SSE register usage.
 void __vectorcall hfa3(double a, double b, double c, double d, double e, struct HFA2 f) {}
-// X32: define dso_local x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* inreg %f)
+// X32: define dso_local x86_vectorcallcc void @"\01hfa3@@56"(double inreg %a, double inreg %b, double inreg %c, double inreg %d, double inreg %e, %struct.HFA2* inreg %f)
 // X64: define dso_local x86_vectorcallcc void @"\01hfa3@@56"(double %a, double %b, double %c, double %d, double %e, %struct.HFA2* %f)
 
 // Aggregates with more than four elements are not HFAs and are passed byval.
@@ -64,21 +64,21 @@ v4f32 __vectorcall hva1(int a, struct HVA4 b, int c) {return b.w;}
 // X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva1@@80"(i32 %a, %struct.HVA4 inreg %b.coerce, i32 %c)
 
 v4f32 __vectorcall hva2(struct HVA4 a, struct HVA4 b, v4f32 c) {return c;}
-// X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva2@@144"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* inreg %b, <4 x float> %c)
+// X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva2@@144"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* inreg %b, <4 x float> inreg %c)
 // X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva2@@144"(%struct.HVA4 inreg %a.coerce, %struct.HVA4* %b, <4 x float> %c)
 
 v4f32 __vectorcall hva3(v4f32 a, v4f32 b, v4f32 c, v4f32 d, v4f32 e, struct HVA2 f) {return f.x;}
-// X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* inreg %f)
+// X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva3@@112"(<4 x float> inreg %a, <4 x float> inreg %b, <4 x float> inreg %c, <4 x float> inreg %d, <4 x float> inreg %e, %struct.HVA2* inreg %f)
 // X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva3@@112"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, <4 x float> %e, %struct.HVA2* %f)
 
 // vector types have higher priority then HVA structures, So vector types are allocated first
 // and HVAs are allocated if enough registers are available
 v4f32 __vectorcall hva4(struct HVA4 a, struct HVA2 b, v4f32 c) {return b.y;}
-// X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva4@@112"(%struct.HVA4 inreg %a.coerce, %struct.HVA2* inreg %b, <4 x float> %c)
+// X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva4@@112"(%struct.HVA4 inreg %a.coerce, %struct.HVA2* inreg %b, <4 x float> inreg %c)
 // X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva4@@112"(%struct.HVA4 inreg %a.coerce, %struct.HVA2* %b, <4 x float> %c)
 
 v4f32 __vectorcall hva5(struct HVA3 a, struct HVA3 b, v4f32 c, struct HVA2 d) {return d.y;}
-// X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva5@@144"(%struct.HVA3 inreg %a.coerce, %struct.HVA3* inreg %b, <4 x float> %c, %struct.HVA2 inreg %d.coerce)
+// X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva5@@144"(%struct.HVA3 inreg %a.coerce, %struct.HVA3* inreg %b, <4 x float> inreg %c, %struct.HVA2 inreg %d.coerce)
 // X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva5@@144"(%struct.HVA3 inreg %a.coerce, %struct.HVA3* %b, <4 x float> %c, %struct.HVA2 inreg %d.coerce)
 
 struct HVA4 __vectorcall hva6(struct HVA4 a, struct HVA4 b) { return b;}
@@ -90,7 +90,7 @@ struct HVA5 __vectorcall hva7() {struct HVA5 a = {}; return a;}
 // X64: define dso_local x86_vectorcallcc void @"\01hva7@@0"(%struct.HVA5* noalias sret %agg.result)
 
 v4f32 __vectorcall hva8(v4f32 a, v4f32 b, v4f32 c, v4f32 d, int e, v4f32 f) {return f;}
-// X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva8@@84"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 inreg %e, <4 x float> %f)
+// X32: define dso_local x86_vectorcallcc <4 x float> @"\01hva8@@84"(<4 x float> inreg %a, <4 x float> inreg %b, <4 x float> inreg %c, <4 x float> inreg %d, i32 inreg %e, <4 x float> inreg %f)
 // X64: define dso_local x86_vectorcallcc <4 x float> @"\01hva8@@88"(<4 x float> %a, <4 x float> %b, <4 x float> %c, <4 x float> %d, i32 %e, <4 x float> %f)
 
 typedef float __attribute__((ext_vector_type(3))) v3f32;
@@ -104,7 +104,7 @@ void __vectorcall odd_size_hva(struct OddSizeHVA a) {}
 // consider 'p7' as a register.  Instead p5 gets put into the register on the second pass.
 // x86 should pass p2, p6 and p7 in registers, then p1 in the second pass.
 struct HFA2 __vectorcall AddParticles(struct HFA2 p1, float p2, struct HFA4 p3, int p4, struct HFA2 p5, float p6, float p7, int p8){ return p1;}
-// X32: define dso_local x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@84"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* inreg %p3, i32 inreg %p4, %struct.HFA2* %p5, float %p6, float %p7, i32 %p8)
+// X32: define dso_local x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@84"(%struct.HFA2 inreg %p1.coerce, float inreg %p2, %struct.HFA4* inreg %p3, i32 inreg %p4, %struct.HFA2* %p5, float inreg %p6, float inreg %p7, i32 %p8)
 // X64: define dso_local x86_vectorcallcc %struct.HFA2 @"\01AddParticles@@104"(%struct.HFA2 inreg %p1.coerce, float %p2, %struct.HFA4* %p3, i32 %p4, %struct.HFA2 inreg %p5.coerce, float %p6, float %p7, i32 %p8)
 
 // Vectorcall in both architectures allows passing of an HVA as long as there is room,
@@ -113,7 +113,7 @@ struct HFA2 __vectorcall AddParticles(struct HFA2 p1, float p2, struct HFA4 p3,
 // in a register, does NOT put p7 in a register (since theres no room), then puts 
 // p8 in a register.
 void __vectorcall HVAAnywhere(struct HFA2 p1, int p2, int p3, float p4, int p5, int p6, struct HFA4 p7, struct HFA2 p8, float p9){}
-// X32: define dso_local x86_vectorcallcc void @"\01HVAAnywhere@@88"(%struct.HFA2 inreg %p1.coerce, i32 inreg %p2, i32 inreg %p3, float %p4, i32 %p5, i32 %p6, %struct.HFA4* %p7, %struct.HFA2 inreg %p8.coerce, float %p9)
+// X32: define dso_local x86_vectorcallcc void @"\01HVAAnywhere@@88"(%struct.HFA2 inreg %p1.coerce, i32 inreg %p2, i32 inreg %p3, float inreg %p4, i32 %p5, i32 %p6, %struct.HFA4* %p7, %struct.HFA2 inreg %p8.coerce, float inreg %p9)
 // X64: define dso_local x86_vectorcallcc void @"\01HVAAnywhere@@112"(%struct.HFA2 inreg %p1.coerce, i32 %p2, i32 %p3, float %p4, i32 %p5, i32 %p6, %struct.HFA4* %p7, %struct.HFA2 inreg %p8.coerce, float %p9) 
 
 #ifndef __x86_64__
@@ -127,12 +127,12 @@ void __vectorcall vectorcall_indirect_vec(
 }
 
 // X32: define dso_local x86_vectorcallcc void @"\01vectorcall_indirect_vec@@{{[0-9]+}}"
-// X32-SAME: (double %xmm0,
-// X32-SAME: double %xmm1,
-// X32-SAME: double %xmm2,
-// X32-SAME: double %xmm3,
-// X32-SAME: double %xmm4,
-// X32-SAME: <4 x float> %xmm5,
+// X32-SAME: (double inreg %xmm0,
+// X32-SAME: double inreg %xmm1,
+// X32-SAME: double inreg %xmm2,
+// X32-SAME: double inreg %xmm3,
+// X32-SAME: double inreg %xmm4,
+// X32-SAME: <4 x float> inreg %xmm5,
 // X32-SAME: <4 x float>* inreg %0,
 // X32-SAME: i32 inreg %edx,
 // X32-SAME: <4 x float>* %1)

diff  --git a/clang/test/CodeGenCXX/inalloca-vector.cpp b/clang/test/CodeGenCXX/inalloca-vector.cpp
index f3d7f81e9443..ad04e046d21d 100644
--- a/clang/test/CodeGenCXX/inalloca-vector.cpp
+++ b/clang/test/CodeGenCXX/inalloca-vector.cpp
@@ -66,14 +66,13 @@ void __vectorcall vectorcall_receive_vec(double xmm0, double xmm1, double xmm2,
                                          __m128 w, int edx, __m128 q, NonTrivial nt) {
   gv128 = x + y + z + w + q;
 }
-// FIXME: Enable these checks, clang generates wrong IR.
 // CHECK-LABEL: define dso_local x86_vectorcallcc void @"?vectorcall_receive_vec@@Y{{[^"]*}}"
-// CHECKX-SAME: (double inreg %xmm0,
-// CHECKX-SAME: double inreg %xmm1,
-// CHECKX-SAME: double inreg %xmm2,
-// CHECKX-SAME: <4 x float> inreg %x,
-// CHECKX-SAME: <4 x float> inreg %y,
-// CHECKX-SAME: <4 x float> inreg %z,
-// CHECKX-SAME: <4 x float>* inreg %0,
-// CHECKX-SAME: i32 inreg %edx,
-// CHECKX-SAME: <{ <4 x float>*, %struct.NonTrivial }>* inalloca %1)
+// CHECK-SAME: (double inreg %xmm0,
+// CHECK-SAME: double inreg %xmm1,
+// CHECK-SAME: double inreg %xmm2,
+// CHECK-SAME: <4 x float> inreg %x,
+// CHECK-SAME: <4 x float> inreg %y,
+// CHECK-SAME: <4 x float> inreg %z,
+// CHECK-SAME: <4 x float>* inreg %0,
+// CHECK-SAME: i32 inreg %edx,
+// CHECK-SAME: <{ <4 x float>*, %struct.NonTrivial }>* inalloca %1)