[cfe-commits] r167440 - in /cfe/trunk: lib/CodeGen/TargetInfo.cpp test/CodeGen/arm-arguments.c

Tue Nov 6 11:00:47 PST 2012

On Nov 5, 2012, at 8:58 PM, Manman Ren <mren at apple.com> wrote:

> Author: mren
> Date: Mon Nov  5 22:58:01 2012
> New Revision: 167440
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=167440&view=rev
> Log:
> ARM byval: when type alignment is bigger than ABI alignment, instead of
> disabling byval, we set realign to true.
> 
> It will perform an aligned alloca, and call memcpy to copy the byval
> argument to the local variable.
> Change the size threshold back to 64 bytes.
> 
> rdar://12596507
> 
> Modified:
>    cfe/trunk/lib/CodeGen/TargetInfo.cpp
>    cfe/trunk/test/CodeGen/arm-arguments.c
> 
> Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=167440&r1=167439&r2=167440&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
> +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Mon Nov  5 22:58:01 2012
> @@ -3221,16 +3221,15 @@
> 
>   // Support byval for ARM.
>   // The ABI alignment for APCS is 4-byte and for AAPCS at least 4-byte and at most 8-byte.
> -  // Byval can't handle the case where type alignment is bigger than ABI alignment.
> -  // We also increase the threshold for byval due to its overhead.
> +  // We realign the indirect argument if type alignment is bigger than ABI alignment.

That doesn't fit in 80 columns, does it?

>   uint64_t ABIAlign = 4;
>   uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8;
>   if (getABIKind() == ARMABIInfo::AAPCS_VFP ||
>       getABIKind() == ARMABIInfo::AAPCS)
>     ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8);
> -  if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64*8) &&
> -      TyAlign <= ABIAlign) {
> -    return ABIArgInfo::getIndirect(0, /*ByVal=*/true);
> +  if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) {
> +    return ABIArgInfo::getIndirect(0, /*ByVal=*/true,
> +           /*Realign=*/TyAlign <= ABIAlign ? false : true);

Isn't that the same as TyAlign > ABIAlign?

>   }
> 
>   // Otherwise, pass by coercing to a structure of the appropriate size.
> 
> Modified: cfe/trunk/test/CodeGen/arm-arguments.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-arguments.c?rev=167440&r1=167439&r2=167440&view=diff
> ==============================================================================
> --- cfe/trunk/test/CodeGen/arm-arguments.c (original)
> +++ cfe/trunk/test/CodeGen/arm-arguments.c Mon Nov  5 22:58:01 2012
> @@ -191,3 +191,35 @@
> // AAPCS: %[[a:.*]] = alloca { [1 x i32] }
> // AAPCS: %[[gep:.*]] = getelementptr { [1 x i32] }* %[[a]], i32 0, i32 0
> // AAPCS: load [1 x i32]* %[[gep]]
> +
> +// rdar://12596507
> +struct s35
> +{
> +   float v[18]; //make sure byval is on.
> +} __attribute__((aligned(16)));
> +typedef struct s35 s35_with_align;
> +
> +typedef __attribute__((neon_vector_type(4))) float float32x4_t;
> +static __attribute__((__always_inline__, __nodebug__)) float32x4_t vaddq_f32(
> +       float32x4_t __a, float32x4_t __b) {
> + return __a + __b;
> +}
> +float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) {
> +  float32x4_t v = vaddq_f32(*(float32x4_t *)&s1,
> +                            *(float32x4_t *)&s2);
> +  return v;
> +}
> +// APCS-GNU: define <4 x float> @f35(i32 %i, %struct.s35* byval, %struct.s35* byval)
> +// APCS-GNU: %[[a:.*]] = alloca %struct.s35, align 16
> +// APCS-GNU: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8*
> +// APCS-GNU: %[[c:.*]] = bitcast %struct.s35* %0 to i8*
> +// APCS-GNU: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]]
> +// APCS-GNU: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
> +// APCS-GNU: load <4 x float>* %[[d]], align 16
> +// AAPCS: define arm_aapcscc <4 x float> @f35(i32 %i, %struct.s35* byval, %struct.s35* byval)
> +// AAPCS: %[[a:.*]] = alloca %struct.s35, align 16
> +// AAPCS: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8*
> +// AAPCS: %[[c:.*]] = bitcast %struct.s35* %0 to i8*
> +// AAPCS: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]]
> +// AAPCS: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
> +// AAPCS: load <4 x float>* %[[d]], align 16
> 
> 
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits