[PATCH] D14596: [SROA] Choose more profitable type in findCommonType
Guozhi Wei via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 15 17:38:15 PST 2015
Carrot added a comment.
Following is the IR of function foo in David's test case, after the last pass that still has the correct type for %ldd,
define void @_Z3fooi(i32 signext %n) #0 {
entry:
%ldd = alloca %"struct.std::complex", align 4
%ref.tmp = alloca %"struct.std::complex", align 4
%0 = bitcast %"struct.std::complex"* %ldd to i8*
call void @llvm.lifetime.start(i64 8, i8* %0) #3
call void @_ZNSt7complexIfEC2Eff(%"struct.std::complex"* %ldd, float 0.000000e+00, float 0.000000e+00)
br label %for.cond
for.cond: ; preds = %for.body, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%cmp = icmp slt i32 %i.0, %n
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%call = call fastcc [2 x float] @_ZL7computeRSt7complexIfES1_()
%coerce.dive = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %ref.tmp, i32 0, i32 0
%1 = bitcast { float, float }* %coerce.dive to [2 x float]*
%call.fca.0.gep = getelementptr inbounds [2 x float], [2 x float]* %1, i32 0, i32 0
%call.fca.0.extract = extractvalue [2 x float] %call, 0
store float %call.fca.0.extract, float* %call.fca.0.gep
%call.fca.1.gep = getelementptr inbounds [2 x float], [2 x float]* %1, i32 0, i32 1
%call.fca.1.extract = extractvalue [2 x float] %call, 1
store float %call.fca.1.extract, float* %call.fca.1.gep
%call1 = call dereferenceable(8) %"struct.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"struct.std::complex"* %ldd, %"struct.std::complex"* dereferenceable(8) %ref.tmp)
%inc = add nsw i32 %i.0, 1
br label %for.cond
for.end: ; preds = %for.cond
call void @llvm.memcpy.p0i8.p0i8.i64(i8* bitcast (%"struct.std::complex"* @dd to i8*), i8* %0, i64 8, i32 4, i1 false)
call void @llvm.lifetime.end(i64 8, i8* %0) #3
ret void
}
In this function there is no explicit floating point operation, all of them are wrapped by function call. So combine pass actually works as intended. The memcpy is lowered to integer ld/st. But later in inlining pass all the complex number function calls are inlined, so in SROA pass it can see both integer ld/st and fp operations.
http://reviews.llvm.org/D14596
More information about the llvm-commits
mailing list