[cfe-commits] r167440 - in /cfe/trunk: lib/CodeGen/TargetInfo.cpp test/CodeGen/arm-arguments.c

Tue Nov 6 11:06:44 PST 2012

Sorry, just re-installed the OS and forgot to enforce the check.

In r167476.

Manman

On Nov 6, 2012, at 11:00 AM, Bob Wilson wrote:

> 
> On Nov 5, 2012, at 8:58 PM, Manman Ren <mren at apple.com> wrote:
> 
>> Author: mren
>> Date: Mon Nov  5 22:58:01 2012
>> New Revision: 167440
>> 
>> URL: http://llvm.org/viewvc/llvm-project?rev=167440&view=rev
>> Log:
>> ARM byval: when type alignment is bigger than ABI alignment, instead of
>> disabling byval, we set realign to true.
>> 
>> It will perform an aligned alloca, and call memcpy to copy the byval
>> argument to the local variable.
>> Change the size threshold back to 64 bytes.
>> 
>> rdar://12596507
>> 
>> Modified:
>>   cfe/trunk/lib/CodeGen/TargetInfo.cpp
>>   cfe/trunk/test/CodeGen/arm-arguments.c
>> 
>> Modified: cfe/trunk/lib/CodeGen/TargetInfo.cpp
>> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/TargetInfo.cpp?rev=167440&r1=167439&r2=167440&view=diff
>> ==============================================================================
>> --- cfe/trunk/lib/CodeGen/TargetInfo.cpp (original)
>> +++ cfe/trunk/lib/CodeGen/TargetInfo.cpp Mon Nov  5 22:58:01 2012
>> @@ -3221,16 +3221,15 @@
>> 
>>  // Support byval for ARM.
>>  // The ABI alignment for APCS is 4-byte and for AAPCS at least 4-byte and at most 8-byte.
>> -  // Byval can't handle the case where type alignment is bigger than ABI alignment.
>> -  // We also increase the threshold for byval due to its overhead.
>> +  // We realign the indirect argument if type alignment is bigger than ABI alignment.
> 
> That doesn't fit in 80 columns, does it?
> 
>>  uint64_t ABIAlign = 4;
>>  uint64_t TyAlign = getContext().getTypeAlign(Ty) / 8;
>>  if (getABIKind() == ARMABIInfo::AAPCS_VFP ||
>>      getABIKind() == ARMABIInfo::AAPCS)
>>    ABIAlign = std::min(std::max(TyAlign, (uint64_t)4), (uint64_t)8);
>> -  if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64*8) &&
>> -      TyAlign <= ABIAlign) {
>> -    return ABIArgInfo::getIndirect(0, /*ByVal=*/true);
>> +  if (getContext().getTypeSizeInChars(Ty) > CharUnits::fromQuantity(64)) {
>> +    return ABIArgInfo::getIndirect(0, /*ByVal=*/true,
>> +           /*Realign=*/TyAlign <= ABIAlign ? false : true);
> 
> Isn't that the same as TyAlign > ABIAlign?
> 
>>  }
>> 
>>  // Otherwise, pass by coercing to a structure of the appropriate size.
>> 
>> Modified: cfe/trunk/test/CodeGen/arm-arguments.c
>> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/arm-arguments.c?rev=167440&r1=167439&r2=167440&view=diff
>> ==============================================================================
>> --- cfe/trunk/test/CodeGen/arm-arguments.c (original)
>> +++ cfe/trunk/test/CodeGen/arm-arguments.c Mon Nov  5 22:58:01 2012
>> @@ -191,3 +191,35 @@
>> // AAPCS: %[[a:.*]] = alloca { [1 x i32] }
>> // AAPCS: %[[gep:.*]] = getelementptr { [1 x i32] }* %[[a]], i32 0, i32 0
>> // AAPCS: load [1 x i32]* %[[gep]]
>> +
>> +// rdar://12596507
>> +struct s35
>> +{
>> +   float v[18]; //make sure byval is on.
>> +} __attribute__((aligned(16)));
>> +typedef struct s35 s35_with_align;
>> +
>> +typedef __attribute__((neon_vector_type(4))) float float32x4_t;
>> +static __attribute__((__always_inline__, __nodebug__)) float32x4_t vaddq_f32(
>> +       float32x4_t __a, float32x4_t __b) {
>> + return __a + __b;
>> +}
>> +float32x4_t f35(int i, s35_with_align s1, s35_with_align s2) {
>> +  float32x4_t v = vaddq_f32(*(float32x4_t *)&s1,
>> +                            *(float32x4_t *)&s2);
>> +  return v;
>> +}
>> +// APCS-GNU: define <4 x float> @f35(i32 %i, %struct.s35* byval, %struct.s35* byval)
>> +// APCS-GNU: %[[a:.*]] = alloca %struct.s35, align 16
>> +// APCS-GNU: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8*
>> +// APCS-GNU: %[[c:.*]] = bitcast %struct.s35* %0 to i8*
>> +// APCS-GNU: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]]
>> +// APCS-GNU: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
>> +// APCS-GNU: load <4 x float>* %[[d]], align 16
>> +// AAPCS: define arm_aapcscc <4 x float> @f35(i32 %i, %struct.s35* byval, %struct.s35* byval)
>> +// AAPCS: %[[a:.*]] = alloca %struct.s35, align 16
>> +// AAPCS: %[[b:.*]] = bitcast %struct.s35* %[[a]] to i8*
>> +// AAPCS: %[[c:.*]] = bitcast %struct.s35* %0 to i8*
>> +// AAPCS: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[b]], i8* %[[c]]
>> +// AAPCS: %[[d:.*]] = bitcast %struct.s35* %[[a]] to <4 x float>*
>> +// AAPCS: load <4 x float>* %[[d]], align 16
>> 
>> 
>> _______________________________________________
>> cfe-commits mailing list
>> cfe-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>