[clang] eaabaf7 - Revert "[MS] Overhaul how clang passes overaligned args on x86_32"
Hans Wennborg via cfe-commits
cfe-commits at lists.llvm.org
Tue Jan 28 13:25:34 PST 2020
Author: Hans Wennborg
Date: 2020-01-28T22:25:07+01:00
New Revision: eaabaf7e04fe98990a8177a3e053346395efde1c
URL: https://github.com/llvm/llvm-project/commit/eaabaf7e04fe98990a8177a3e053346395efde1c
DIFF: https://github.com/llvm/llvm-project/commit/eaabaf7e04fe98990a8177a3e053346395efde1c.diff
LOG: Revert "[MS] Overhaul how clang passes overaligned args on x86_32"
It broke some Chromium tests, so let's revert until it can be fixed; see
https://crbug.com/1046362
This reverts commit 2af74e27ed7d0832cbdde9cb969aaca7a42e99f9.
Added:
Modified:
clang/include/clang/CodeGen/CGFunctionInfo.h
clang/lib/CodeGen/CGCall.cpp
clang/lib/CodeGen/TargetInfo.cpp
clang/test/CodeGen/x86_32-arguments-win32.c
Removed:
clang/test/CodeGenCXX/inalloca-overaligned.cpp
clang/test/CodeGenCXX/inalloca-vector.cpp
################################################################################
diff --git a/clang/include/clang/CodeGen/CGFunctionInfo.h b/clang/include/clang/CodeGen/CGFunctionInfo.h
index 588c96afe402..2a41ab9eece7 100644
--- a/clang/include/clang/CodeGen/CGFunctionInfo.h
+++ b/clang/include/clang/CodeGen/CGFunctionInfo.h
@@ -88,7 +88,6 @@ class ABIArgInfo {
Kind TheKind;
bool PaddingInReg : 1;
bool InAllocaSRet : 1; // isInAlloca()
- bool InAllocaIndirect : 1;// isInAlloca()
bool IndirectByVal : 1; // isIndirect()
bool IndirectRealign : 1; // isIndirect()
bool SRetAfterThis : 1; // isIndirect()
@@ -111,8 +110,8 @@ class ABIArgInfo {
public:
ABIArgInfo(Kind K = Direct)
- : TypeData(nullptr), PaddingType(nullptr), DirectOffset(0), TheKind(K),
- PaddingInReg(false), InAllocaSRet(false), InAllocaIndirect(false),
+ : TypeData(nullptr), PaddingType(nullptr), DirectOffset(0),
+ TheKind(K), PaddingInReg(false), InAllocaSRet(false),
IndirectByVal(false), IndirectRealign(false), SRetAfterThis(false),
InReg(false), CanBeFlattened(false), SignExt(false) {}
@@ -186,10 +185,9 @@ class ABIArgInfo {
AI.setInReg(true);
return AI;
}
- static ABIArgInfo getInAlloca(unsigned FieldIndex, bool Indirect = false) {
+ static ABIArgInfo getInAlloca(unsigned FieldIndex) {
auto AI = ABIArgInfo(InAlloca);
AI.setInAllocaFieldIndex(FieldIndex);
- AI.setInAllocaIndirect(Indirect);
return AI;
}
static ABIArgInfo getExpand() {
@@ -382,15 +380,6 @@ class ABIArgInfo {
AllocaFieldIndex = FieldIndex;
}
- unsigned getInAllocaIndirect() const {
- assert(isInAlloca() && "Invalid kind!");
- return InAllocaIndirect;
- }
- void setInAllocaIndirect(bool Indirect) {
- assert(isInAlloca() && "Invalid kind!");
- InAllocaIndirect = Indirect;
- }
-
/// Return true if this field of an inalloca struct should be returned
/// to implement a struct return calling convention.
bool getInAllocaSRet() const {
diff --git a/clang/lib/CodeGen/CGCall.cpp b/clang/lib/CodeGen/CGCall.cpp
index 5b03f37f4498..3a50e2b103f6 100644
--- a/clang/lib/CodeGen/CGCall.cpp
+++ b/clang/lib/CodeGen/CGCall.cpp
@@ -2339,9 +2339,6 @@ void CodeGenFunction::EmitFunctionProlog(const CGFunctionInfo &FI,
auto FieldIndex = ArgI.getInAllocaFieldIndex();
Address V =
Builder.CreateStructGEP(ArgStruct, FieldIndex, Arg->getName());
- if (ArgI.getInAllocaIndirect())
- V = Address(Builder.CreateLoad(V),
- getContext().getTypeAlignInChars(Ty));
ArgVals.push_back(ParamValue::forIndirect(V));
break;
}
@@ -4041,39 +4038,18 @@ RValue CodeGenFunction::EmitCall(const CGFunctionInfo &CallInfo,
assert(NumIRArgs == 0);
assert(getTarget().getTriple().getArch() == llvm::Triple::x86);
if (I->isAggregate()) {
+ // Replace the placeholder with the appropriate argument slot GEP.
Address Addr = I->hasLValue()
? I->getKnownLValue().getAddress(*this)
: I->getKnownRValue().getAggregateAddress();
llvm::Instruction *Placeholder =
cast<llvm::Instruction>(Addr.getPointer());
-
- if (!ArgInfo.getInAllocaIndirect()) {
- // Replace the placeholder with the appropriate argument slot GEP.
- CGBuilderTy::InsertPoint IP = Builder.saveIP();
- Builder.SetInsertPoint(Placeholder);
- Addr = Builder.CreateStructGEP(ArgMemory,
- ArgInfo.getInAllocaFieldIndex());
- Builder.restoreIP(IP);
- } else {
- // For indirect things such as overaligned structs, replace the
- // placeholder with a regular aggregate temporary alloca. Store the
- // address of this alloca into the struct.
- Addr = CreateMemTemp(info_it->type, "inalloca.indirect.tmp");
- Address ArgSlot = Builder.CreateStructGEP(
- ArgMemory, ArgInfo.getInAllocaFieldIndex());
- Builder.CreateStore(Addr.getPointer(), ArgSlot);
- }
- deferPlaceholderReplacement(Placeholder, Addr.getPointer());
- } else if (ArgInfo.getInAllocaIndirect()) {
- // Make a temporary alloca and store the address of it into the argument
- // struct.
- Address Addr = CreateMemTempWithoutCast(
- I->Ty, getContext().getTypeAlignInChars(I->Ty),
- "indirect-arg-temp");
- I->copyInto(*this, Addr);
- Address ArgSlot =
+ CGBuilderTy::InsertPoint IP = Builder.saveIP();
+ Builder.SetInsertPoint(Placeholder);
+ Addr =
Builder.CreateStructGEP(ArgMemory, ArgInfo.getInAllocaFieldIndex());
- Builder.CreateStore(Addr.getPointer(), ArgSlot);
+ Builder.restoreIP(IP);
+ deferPlaceholderReplacement(Placeholder, Addr.getPointer());
} else {
// Store the RValue into the argument struct.
Address Addr =
diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp
index 08ef8ff64976..c803785435ff 100644
--- a/clang/lib/CodeGen/TargetInfo.cpp
+++ b/clang/lib/CodeGen/TargetInfo.cpp
@@ -1676,7 +1676,6 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
bool IsVectorCall = State.CC == llvm::CallingConv::X86_VectorCall;
Ty = useFirstFieldIfTransparentUnion(Ty);
- TypeInfo TI = getContext().getTypeInfo(Ty);
// Check with the C++ ABI first.
const RecordType *RT = Ty->getAs<RecordType>();
@@ -1726,7 +1725,7 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
bool NeedsPadding = false;
bool InReg;
if (shouldAggregateUseDirect(Ty, State, InReg, NeedsPadding)) {
- unsigned SizeInRegs = (TI.Width + 31) / 32;
+ unsigned SizeInRegs = (getContext().getTypeSize(Ty) + 31) / 32;
SmallVector<llvm::Type*, 3> Elements(SizeInRegs, Int32);
llvm::Type *Result = llvm::StructType::get(LLVMContext, Elements);
if (InReg)
@@ -1736,19 +1735,14 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
}
llvm::IntegerType *PaddingType = NeedsPadding ? Int32 : nullptr;
- // Pass over-aligned aggregates on Windows indirectly. This behavior was
- // added in MSVC 2015.
- if (IsWin32StructABI && TI.AlignIsRequired && TI.Align > 32)
- return getIndirectResult(Ty, /*ByVal=*/false, State);
-
// Expand small (<= 128-bit) record types when we know that the stack layout
// of those arguments will match the struct. This is important because the
// LLVM backend isn't smart enough to remove byval, which inhibits many
// optimizations.
// Don't do this for the MCU if there are still free integer registers
// (see X86_64 ABI for full explanation).
- if (TI.Width <= 4 * 32 && (!IsMCUABI || State.FreeRegs == 0) &&
- canExpandIndirectArgument(Ty))
+ if (getContext().getTypeSize(Ty) <= 4 * 32 &&
+ (!IsMCUABI || State.FreeRegs == 0) && canExpandIndirectArgument(Ty))
return ABIArgInfo::getExpandWithPadding(
IsFastCall || IsVectorCall || IsRegCall, PaddingType);
@@ -1756,24 +1750,14 @@ ABIArgInfo X86_32ABIInfo::classifyArgumentType(QualType Ty,
}
if (const VectorType *VT = Ty->getAs<VectorType>()) {
- // On Windows, vectors are passed directly if registers are available, or
- // indirectly if not. This avoids the need to align argument memory. Pass
- // user-defined vector types larger than 512 bits indirectly for simplicity.
- if (IsWin32StructABI) {
- if (TI.Width <= 512 && State.FreeSSERegs > 0) {
- --State.FreeSSERegs;
- return ABIArgInfo::getDirectInReg();
- }
- return getIndirectResult(Ty, /*ByVal=*/false, State);
- }
-
// On Darwin, some vectors are passed in memory, we handle this by passing
// it as an i8/i16/i32/i64.
if (IsDarwinVectorABI) {
- if ((TI.Width == 8 || TI.Width == 16 || TI.Width == 32) ||
- (TI.Width == 64 && VT->getNumElements() == 1))
- return ABIArgInfo::getDirect(
- llvm::IntegerType::get(getVMContext(), TI.Width));
+ uint64_t Size = getContext().getTypeSize(Ty);
+ if ((Size == 8 || Size == 16 || Size == 32) ||
+ (Size == 64 && VT->getNumElements() == 1))
+ return ABIArgInfo::getDirect(llvm::IntegerType::get(getVMContext(),
+ Size));
}
if (IsX86_MMXType(CGT.ConvertType(Ty)))
@@ -1803,10 +1787,9 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
CCState State(FI);
if (IsMCUABI)
State.FreeRegs = 3;
- else if (State.CC == llvm::CallingConv::X86_FastCall) {
+ else if (State.CC == llvm::CallingConv::X86_FastCall)
State.FreeRegs = 2;
- State.FreeSSERegs = 3;
- } else if (State.CC == llvm::CallingConv::X86_VectorCall) {
+ else if (State.CC == llvm::CallingConv::X86_VectorCall) {
State.FreeRegs = 2;
State.FreeSSERegs = 6;
} else if (FI.getHasRegParm())
@@ -1814,11 +1797,6 @@ void X86_32ABIInfo::computeInfo(CGFunctionInfo &FI) const {
else if (State.CC == llvm::CallingConv::X86_RegCall) {
State.FreeRegs = 5;
State.FreeSSERegs = 8;
- } else if (IsWin32StructABI) {
- // Since MSVC 2015, the first three SSE vectors have been passed in
- // registers. The rest are passed indirectly.
- State.FreeRegs = DefaultNumRegisterParameters;
- State.FreeSSERegs = 3;
} else
State.FreeRegs = DefaultNumRegisterParameters;
@@ -1865,25 +1843,16 @@ X86_32ABIInfo::addFieldToArgStruct(SmallVector<llvm::Type *, 6> &FrameFields,
CharUnits &StackOffset, ABIArgInfo &Info,
QualType Type) const {
// Arguments are always 4-byte-aligned.
- CharUnits WordSize = CharUnits::fromQuantity(4);
- assert(StackOffset.isMultipleOf(WordSize) && "unaligned inalloca struct");
+ CharUnits FieldAlign = CharUnits::fromQuantity(4);
- // sret pointers and indirect things will require an extra pointer
- // indirection, unless they are byval. Most things are byval, and will not
- // require this indirection.
- bool IsIndirect = false;
- if (Info.isIndirect() && !Info.getIndirectByVal())
- IsIndirect = true;
- Info = ABIArgInfo::getInAlloca(FrameFields.size(), IsIndirect);
- llvm::Type *LLTy = CGT.ConvertTypeForMem(Type);
- if (IsIndirect)
- LLTy = LLTy->getPointerTo(0);
- FrameFields.push_back(LLTy);
- StackOffset += IsIndirect ? WordSize : getContext().getTypeSizeInChars(Type);
+ assert(StackOffset.isMultipleOf(FieldAlign) && "unaligned inalloca struct");
+ Info = ABIArgInfo::getInAlloca(FrameFields.size());
+ FrameFields.push_back(CGT.ConvertTypeForMem(Type));
+ StackOffset += getContext().getTypeSizeInChars(Type);
// Insert padding bytes to respect alignment.
CharUnits FieldEnd = StackOffset;
- StackOffset = FieldEnd.alignTo(WordSize);
+ StackOffset = FieldEnd.alignTo(FieldAlign);
if (StackOffset != FieldEnd) {
CharUnits NumBytes = StackOffset - FieldEnd;
llvm::Type *Ty = llvm::Type::getInt8Ty(getVMContext());
@@ -1897,12 +1866,16 @@ static bool isArgInAlloca(const ABIArgInfo &Info) {
switch (Info.getKind()) {
case ABIArgInfo::InAlloca:
return true;
+ case ABIArgInfo::Indirect:
+ assert(Info.getIndirectByVal());
+ return true;
case ABIArgInfo::Ignore:
return false;
- case ABIArgInfo::Indirect:
case ABIArgInfo::Direct:
case ABIArgInfo::Extend:
- return !Info.getInReg();
+ if (Info.getInReg())
+ return false;
+ return true;
case ABIArgInfo::Expand:
case ABIArgInfo::CoerceAndExpand:
// These are aggregate types which are never passed in registers when
@@ -1936,7 +1909,8 @@ void X86_32ABIInfo::rewriteWithInAlloca(CGFunctionInfo &FI) const {
// Put the sret parameter into the inalloca struct if it's in memory.
if (Ret.isIndirect() && !Ret.getInReg()) {
- addFieldToArgStruct(FrameFields, StackOffset, Ret, FI.getReturnType());
+ CanQualType PtrTy = getContext().getPointerType(FI.getReturnType());
+ addFieldToArgStruct(FrameFields, StackOffset, Ret, PtrTy);
// On Windows, the hidden sret parameter is always returned in eax.
Ret.setInAllocaSRet(IsWin32StructABI);
}
diff --git a/clang/test/CodeGen/x86_32-arguments-win32.c b/clang/test/CodeGen/x86_32-arguments-win32.c
index 33a6216a62bd..65e25f32c250 100644
--- a/clang/test/CodeGen/x86_32-arguments-win32.c
+++ b/clang/test/CodeGen/x86_32-arguments-win32.c
@@ -46,47 +46,3 @@ struct s6 {
struct s6 f6_1(void) { while (1) {} }
void f6_2(struct s6 a0) {}
-
-// MSVC passes up to three vectors in registers, and the rest indirectly. We
-// (arbitrarily) pass oversized vectors indirectly, since that is the safest way
-// to do it.
-typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16)));
-typedef float __m256 __attribute__((__vector_size__(32), __aligned__(32)));
-typedef float __m512 __attribute__((__vector_size__(64), __aligned__(64)));
-typedef float __m1024 __attribute__((__vector_size__(128), __aligned__(128)));
-
-__m128 gv128;
-__m256 gv256;
-__m512 gv512;
-__m1024 gv1024;
-
-void receive_vec_128(__m128 x, __m128 y, __m128 z, __m128 w, __m128 q) {
- gv128 = x + y + z + w + q;
-}
-void receive_vec_256(__m256 x, __m256 y, __m256 z, __m256 w, __m256 q) {
- gv256 = x + y + z + w + q;
-}
-void receive_vec_512(__m512 x, __m512 y, __m512 z, __m512 w, __m512 q) {
- gv512 = x + y + z + w + q;
-}
-void receive_vec_1024(__m1024 x, __m1024 y, __m1024 z, __m1024 w, __m1024 q) {
- gv1024 = x + y + z + w + q;
-}
-// CHECK-LABEL: define dso_local void @receive_vec_128(<4 x float> inreg %x, <4 x float> inreg %y, <4 x float> inreg %z, <4 x float>* %0, <4 x float>* %1)
-// CHECK-LABEL: define dso_local void @receive_vec_256(<8 x float> inreg %x, <8 x float> inreg %y, <8 x float> inreg %z, <8 x float>* %0, <8 x float>* %1)
-// CHECK-LABEL: define dso_local void @receive_vec_512(<16 x float> inreg %x, <16 x float> inreg %y, <16 x float> inreg %z, <16 x float>* %0, <16 x float>* %1)
-// CHECK-LABEL: define dso_local void @receive_vec_1024(<32 x float>* %0, <32 x float>* %1, <32 x float>* %2, <32 x float>* %3, <32 x float>* %4)
-
-void pass_vec_128() {
- __m128 z = {0};
- receive_vec_128(z, z, z, z, z);
-}
-
-// CHECK-LABEL: define dso_local void @pass_vec_128()
-// CHECK: call void @receive_vec_128(<4 x float> inreg %{{[^,)]*}}, <4 x float> inreg %{{[^,)]*}}, <4 x float> inreg %{{[^,)]*}}, <4 x float>* %{{[^,)]*}}, <4 x float>* %{{[^,)]*}})
-
-
-void __fastcall fastcall_indirect_vec(__m128 x, __m128 y, __m128 z, __m128 w, int edx, __m128 q) {
- gv128 = x + y + z + w + q;
-}
-// CHECK-LABEL: define dso_local x86_fastcallcc void @"\01 at fastcall_indirect_vec@84"(<4 x float> inreg %x, <4 x float> inreg %y, <4 x float> inreg %z, <4 x float>* inreg %0, i32 inreg %edx, <4 x float>* %1)
diff --git a/clang/test/CodeGenCXX/inalloca-overaligned.cpp b/clang/test/CodeGenCXX/inalloca-overaligned.cpp
deleted file mode 100644
index 910f0d92895e..000000000000
--- a/clang/test/CodeGenCXX/inalloca-overaligned.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-// RUN: %clang_cc1 -fms-extensions -w -triple i386-pc-win32 -emit-llvm -o - %s | FileCheck %s
-
-// PR44395
-// MSVC passes overaligned types indirectly since MSVC 2015. Make sure that
-// works with inalloca.
-
-// FIXME: Pass non-trivial *and* overaligned types indirectly. Right now the C++
-// ABI rules say to use inalloca, and they take precedence, so it's not easy to
-// implement this.
-
-
-struct NonTrivial {
- NonTrivial();
- NonTrivial(const NonTrivial &o);
- int x;
-};
-
-struct __declspec(align(64)) OverAligned {
- OverAligned();
- int buf[16];
-};
-
-extern int gvi32;
-
-int receive_inalloca_overaligned(NonTrivial nt, OverAligned o) {
- return nt.x + o.buf[0];
-}
-
-// CHECK-LABEL: define dso_local i32 @"?receive_inalloca_overaligned@@Y{{.*}}"
-// CHECK-SAME: (<{ %struct.NonTrivial, %struct.OverAligned* }>* inalloca %0)
-
-int pass_inalloca_overaligned() {
- gvi32 = receive_inalloca_overaligned(NonTrivial(), OverAligned());
- return gvi32;
-}
-
-// CHECK-LABEL: define dso_local i32 @"?pass_inalloca_overaligned@@Y{{.*}}"
-// CHECK: [[TMP:%[^ ]*]] = alloca %struct.OverAligned, align 64
-// CHECK: call i8* @llvm.stacksave()
-// CHECK: alloca inalloca <{ %struct.NonTrivial, %struct.OverAligned* }>
-
-// Construct OverAligned into TMP.
-// CHECK: call x86_thiscallcc %struct.OverAligned* @"??0OverAligned@@QAE at XZ"(%struct.OverAligned* [[TMP]])
-
-// Construct NonTrivial into the GEP.
-// CHECK: [[GEP:%[^ ]*]] = getelementptr inbounds <{ %struct.NonTrivial, %struct.OverAligned* }>, <{ %struct.NonTrivial, %struct.OverAligned* }>* %{{.*}}, i32 0, i32 0
-// CHECK: call x86_thiscallcc %struct.NonTrivial* @"??0NonTrivial@@QAE at XZ"(%struct.NonTrivial* [[GEP]])
-
-// Store the address of an OverAligned temporary into the struct.
-// CHECK: getelementptr inbounds <{ %struct.NonTrivial, %struct.OverAligned* }>, <{ %struct.NonTrivial, %struct.OverAligned* }>* %{{.*}}, i32 0, i32 1
-// CHECK: store %struct.OverAligned* [[TMP]], %struct.OverAligned** %{{.*}}, align 4
-// CHECK: call i32 @"?receive_inalloca_overaligned@@Y{{.*}}"(<{ %struct.NonTrivial, %struct.OverAligned* }>* inalloca %argmem)
diff --git a/clang/test/CodeGenCXX/inalloca-vector.cpp b/clang/test/CodeGenCXX/inalloca-vector.cpp
deleted file mode 100644
index f3d7f81e9443..000000000000
--- a/clang/test/CodeGenCXX/inalloca-vector.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-// RUN: %clang_cc1 -w -triple i686-pc-win32 -emit-llvm -o - %s | FileCheck %s
-
-// PR44395
-// MSVC passes up to three vectors in registers, and the rest indirectly. Check
-// that both are compatible with an inalloca prototype.
-
-struct NonTrivial {
- NonTrivial();
- NonTrivial(const NonTrivial &o);
- unsigned handle;
-};
-
-typedef float __m128 __attribute__((__vector_size__(16), __aligned__(16)));
-__m128 gv128;
-
-// nt, w, and q will be in the inalloca pack.
-void receive_vec_128(NonTrivial nt, __m128 x, __m128 y, __m128 z, __m128 w, __m128 q) {
- gv128 = x + y + z + w + q;
-}
-// CHECK-LABEL: define dso_local void @"?receive_vec_128@@YAXUNonTrivial@@T__m128@@1111 at Z"
-// CHECK-SAME: (<4 x float> inreg %x,
-// CHECK-SAME: <4 x float> inreg %y,
-// CHECK-SAME: <4 x float> inreg %z,
-// CHECK-SAME: <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>* inalloca %0)
-
-void pass_vec_128() {
- __m128 z = {0};
- receive_vec_128(NonTrivial(), z, z, z, z, z);
-}
-// CHECK-LABEL: define dso_local void @"?pass_vec_128@@YAXXZ"()
-// CHECK: getelementptr inbounds <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>, <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>* %{{[^,]*}}, i32 0, i32 0
-// CHECK: call x86_thiscallcc %struct.NonTrivial* @"??0NonTrivial@@QAE at XZ"(%struct.NonTrivial* %{{.*}})
-
-// Store q, store temp alloca.
-// CHECK: store <4 x float> %{{[^,]*}}, <4 x float>* %{{[^,]*}}, align 16
-// CHECK: getelementptr inbounds <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>, <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>* %{{[^,]*}}, i32 0, i32 1
-// CHECK: store <4 x float>* %{{[^,]*}}, <4 x float>** %{{[^,]*}}, align 4
-
-// Store w, store temp alloca.
-// CHECK: store <4 x float> %{{[^,]*}}, <4 x float>* %{{[^,]*}}, align 16
-// CHECK: getelementptr inbounds <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>, <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>* %{{[^,]*}}, i32 0, i32 2
-// CHECK: store <4 x float>* %{{[^,]*}}, <4 x float>** %{{[^,]*}}, align 4
-
-// CHECK: call void @"?receive_vec_128@@YAXUNonTrivial@@T__m128@@1111 at Z"
-// CHECK-SAME: (<4 x float> inreg %{{[^,]*}},
-// CHECK-SAME: <4 x float> inreg %{{[^,]*}},
-// CHECK-SAME: <4 x float> inreg %{{[^,]*}},
-// CHECK-SAME: <{ %struct.NonTrivial, <4 x float>*, <4 x float>* }>* inalloca %{{[^,]*}})
-
-// w will be passed indirectly by register, and q will be passed indirectly, but
-// the pointer will be in memory.
-void __fastcall fastcall_receive_vec(__m128 x, __m128 y, __m128 z, __m128 w, int edx, __m128 q, NonTrivial nt) {
- gv128 = x + y + z + w + q;
-}
-// CHECK-LABEL: define dso_local x86_fastcallcc void @"?fastcall_receive_vec@@Y{{[^"]*}}"
-// CHECK-SAME: (<4 x float> inreg %x,
-// CHECK-SAME: <4 x float> inreg %y,
-// CHECK-SAME: <4 x float> inreg %z,
-// CHECK-SAME: <4 x float>* inreg %0,
-// CHECK-SAME: i32 inreg %edx,
-// CHECK-SAME: <{ <4 x float>*, %struct.NonTrivial }>* inalloca %1)
-
-
-void __vectorcall vectorcall_receive_vec(double xmm0, double xmm1, double xmm2,
- __m128 x, __m128 y, __m128 z,
- __m128 w, int edx, __m128 q, NonTrivial nt) {
- gv128 = x + y + z + w + q;
-}
-// FIXME: Enable these checks, clang generates wrong IR.
-// CHECK-LABEL: define dso_local x86_vectorcallcc void @"?vectorcall_receive_vec@@Y{{[^"]*}}"
-// CHECKX-SAME: (double inreg %xmm0,
-// CHECKX-SAME: double inreg %xmm1,
-// CHECKX-SAME: double inreg %xmm2,
-// CHECKX-SAME: <4 x float> inreg %x,
-// CHECKX-SAME: <4 x float> inreg %y,
-// CHECKX-SAME: <4 x float> inreg %z,
-// CHECKX-SAME: <4 x float>* inreg %0,
-// CHECKX-SAME: i32 inreg %edx,
-// CHECKX-SAME: <{ <4 x float>*, %struct.NonTrivial }>* inalloca %1)
More information about the cfe-commits
mailing list