Index: include/clang/Basic/TargetInfo.h =================================================================== --- include/clang/Basic/TargetInfo.h (revision 214123) +++ include/clang/Basic/TargetInfo.h (working copy) @@ -66,6 +66,7 @@ unsigned char LongWidth, LongAlign; unsigned char LongLongWidth, LongLongAlign; unsigned char SuitableAlign; + unsigned char CompleteObjectAlign; unsigned char MinGlobalAlign; unsigned char MaxAtomicPromoteWidth, MaxAtomicInlineWidth; unsigned short MaxVectorAlign; @@ -313,6 +314,14 @@ /// \brief Return the alignment that is suitable for storing any /// object with a fundamental alignment requirement. unsigned getSuitableAlign() const { return SuitableAlign; } + + /// \brief Return the alignment that is suitable for storing any + /// complete object with a fundamental alignment requirement. Ideally, + /// this should be replaced with SuitableAlign (which share identical + /// purpose; latter is used in Sema warning and former in + /// IRGen). But, currently they are distinct because of unknown implication + /// of alignment change on non-darwin targets. + unsigned getCompleteObjectAlign() const { return CompleteObjectAlign; } /// getMinGlobalAlign - Return the minimum alignment of a global variable, /// unless its alignment is explicitly reduced via attributes. Index: lib/Basic/TargetInfo.cpp =================================================================== --- lib/Basic/TargetInfo.cpp (revision 214123) +++ lib/Basic/TargetInfo.cpp (working copy) @@ -36,6 +36,7 @@ LongWidth = LongAlign = 32; LongLongWidth = LongLongAlign = 64; SuitableAlign = 64; + CompleteObjectAlign = 0; MinGlobalAlign = 0; HalfWidth = 16; HalfAlign = 16; Index: lib/Basic/Targets.cpp =================================================================== --- lib/Basic/Targets.cpp (revision 214123) +++ lib/Basic/Targets.cpp (working copy) @@ -3109,6 +3109,7 @@ LongDoubleWidth = 128; LongDoubleAlign = 128; SuitableAlign = 128; + CompleteObjectAlign = SuitableAlign; MaxVectorAlign = 256; SizeType = UnsignedLong; IntPtrType = SignedLong; @@ -3428,6 +3429,7 @@ : DarwinTargetInfo(Triple) { Int64Type = SignedLongLong; MaxVectorAlign = 256; + CompleteObjectAlign = SuitableAlign; // The 64-bit iOS simulator uses the builtin bool type for Objective-C. llvm::Triple T = llvm::Triple(Triple); if (T.getOS() == llvm::Triple::IOS) @@ -3544,6 +3546,9 @@ DoubleAlign = LongLongAlign = LongDoubleAlign = SuitableAlign = 64; const llvm::Triple &T = getTriple(); + if (T.isOSDarwin()) + CompleteObjectAlign = SuitableAlign; + // size_t is unsigned long on Darwin and NetBSD. if (T.isOSDarwin() || T.getOS() == llvm::Triple::NetBSD) SizeType = UnsignedLong; @@ -3615,6 +3620,8 @@ IsAAPCS = false; DoubleAlign = LongLongAlign = LongDoubleAlign = SuitableAlign = 32; + if (T.isOSDarwin()) + CompleteObjectAlign = SuitableAlign; // size_t is unsigned int on FreeBSD. if (T.getOS() == llvm::Triple::FreeBSD) @@ -4361,8 +4368,12 @@ RegParmMax = 8; MaxAtomicInlineWidth = 128; MaxAtomicPromoteWidth = 128; - + LongDoubleWidth = LongDoubleAlign = 128; + SuitableAlign = 128; + if (getTriple().getOS() == llvm::Triple::IOS) + CompleteObjectAlign = SuitableAlign; + LongDoubleFormat = &llvm::APFloat::IEEEquad; // {} in inline assembly are neon specifiers, not assembly variant Index: lib/CodeGen/CodeGenFunction.h =================================================================== --- lib/CodeGen/CodeGenFunction.h (revision 214123) +++ lib/CodeGen/CodeGenFunction.h (working copy) @@ -1397,10 +1397,32 @@ CGM.getTBAAInfo(T)); } + static bool RestrictedCompleteObjectAlign(QualType T) { + if (const TypedefType *TD = dyn_cast(T.getTypePtr())) { + if (TypedefNameDecl *Typedef = TD->getDecl()) + return !Typedef->hasAttr(); + return true; + } + // Assume elaborated types (struct, etc.) enforce their own alignment rules. + return !isa(T.getTypePtr()); + } + LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T) { CharUnits Alignment; - if (!T->isIncompleteType()) + if (!T->isIncompleteType()) { Alignment = getContext().getTypeAlignInChars(T); + // For targets with more restrictive alignment for complet objects, + // use the smaller of two alignments (unless type has specified its + // own alignment via aligned attribute). + unsigned CompleteObjectAlign = + getContext().getTargetInfo().getCompleteObjectAlign(); + if (CompleteObjectAlign && RestrictedCompleteObjectAlign(T)) { + CompleteObjectAlign /= getContext().getCharWidth(); + Alignment = CharUnits::fromQuantity( + std::min(unsigned(Alignment.getQuantity()), + unsigned(CompleteObjectAlign))); + } + } return LValue::MakeAddr(V, T, Alignment, getContext(), CGM.getTBAAInfo(T)); } Index: test/CodeGen/arm-arguments.c =================================================================== --- test/CodeGen/arm-arguments.c (revision 214123) +++ test/CodeGen/arm-arguments.c (working copy) @@ -215,11 +215,11 @@ // APCS-GNU: %[[c:.*]] = bitcast %struct.s35* %0 to i8* // APCS-GNU: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]] // APCS-GNU: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>* -// APCS-GNU: load <4 x float>* %[[d]], align 16 +// APCS-GNU: load <4 x float>* %[[d]], align 4 // AAPCS-LABEL: define arm_aapcscc <4 x float> @f35(i32 %i, %struct.s35* byval align 16, %struct.s35* byval align 16) // AAPCS: %[[a:.*]] = alloca %struct.s35, align 16 // AAPCS: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8* // AAPCS: %[[c:.*]] = bitcast %struct.s35* %0 to i8* // AAPCS: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]] // AAPCS: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>* -// AAPCS: load <4 x float>* %[[d]], align 16 +// AAPCS: load <4 x float>* %[[d]], align 8 Index: test/CodeGenCXX/align-avx-complete-objects.cpp =================================================================== --- test/CodeGenCXX/align-avx-complete-objects.cpp (revision 0) +++ test/CodeGenCXX/align-avx-complete-objects.cpp (working copy) @@ -0,0 +1,28 @@ +// RUN: %clang_cc1 -x c++ %s -O0 -triple=x86_64-apple-darwin -target-feature +avx2 -emit-llvm -o - -Werror | FileCheck %s +// rdar://16254558 + +typedef float AVX2Float __attribute__((__vector_size__(32))); + + +volatile float TestAlign(void) +{ + volatile AVX2Float *p = new AVX2Float; + *p = *p; + AVX2Float r = *p; + return r[0]; +} + +// CHECK: [[R:%.*]] = alloca <8 x float>, align 32 +// CHECK-NEXT: [[CALL:%.*]] = call noalias i8* @_Znwm(i64 32) +// CHECK-NEXT: [[ZERO:%.*]] = bitcast i8* [[CALL]] to <8 x float>* +// CHECK-NEXT: store <8 x float>* [[ZERO]], <8 x float>** [[P:%.*]], align 8 +// CHECK-NEXT: [[ONE:%.*]] = load <8 x float>** [[P]], align 8 +// CHECK-NEXT: [[TWO:%.*]] = load volatile <8 x float>* [[ONE]], align 16 +// CHECK-NEXT: [[THREE:%.*]] = load <8 x float>** [[P]], align 8 +// CHECK-NEXT: store volatile <8 x float> [[TWO]], <8 x float>* [[THREE]], align 16 +// CHECK-NEXT: [[FOUR:%.*]] = load <8 x float>** [[P]], align 8 +// CHECK-NEXT: [[FIVE:%.*]] = load volatile <8 x float>* [[FOUR]], align 16 +// CHECK-NEXT: store <8 x float> [[FIVE]], <8 x float>* [[R]], align 32 +// CHECK-NEXT: [[SIX:%.*]] = load <8 x float>* [[R]], align 32 +// CHECK-NEXT: [[VECEXT:%.*]] = extractelement <8 x float> [[SIX]], i32 0 +// CHECK-NEXT: ret float [[VECEXT]]