[PATCH] D14596: [SROA] Choose more profitable type in findCommonType
Guozhi Wei via llvm-commits
llvm-commits at lists.llvm.org
Wed Dec 16 14:26:17 PST 2015
Carrot added a comment.
The IR after inlining:
- IR Dump After Function Integration/Inlining ***
; Function Attrs: nounwind
define void @_Z3fooi(i32 signext %n) #0 {
entry:
%ldd = alloca i64, align 8
%tmpcast = bitcast i64* %ldd to %"struct.std::complex"*
%ref.tmp = alloca %"struct.std::complex", align 4
%0 = bitcast i64* %ldd to i8*
call void @llvm.lifetime.start(i64 8, i8* %0) #3
%_M_value.realp.i = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %tmpcast, i64 0, i32 0, i32 0
store float 0.000000e+00, float* %_M_value.realp.i, align 4
%_M_value2.imagp.i = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %tmpcast, i64 0, i32 0, i32 1
store float 0.000000e+00, float* %_M_value2.imagp.i, align 4
br label %for.cond
for.cond: ; preds = %for.body, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%cmp = icmp slt i32 %i.0, %n
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%1 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd, i64 0, i32 0, i32 0), align 4
%2 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd, i64 0, i32 0, i32 1), align 4
%3 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd2, i64 0, i32 0, i32 0), align 4
%4 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd2, i64 0, i32 0, i32 1), align 4
%mul.i = fmul float %1, %3
%mul4.i = fmul float %2, %4
%sub.i = fsub float %mul.i, %mul4.i
%mul5.i = fmul float %2, %3
%mul6.i = fmul float %1, %4
%add.i.4 = fadd float %mul5.i, %mul6.i
%.fca.0.insert.i = insertvalue [2 x float] undef, float %sub.i, 0
%.fca.1.insert.i = insertvalue [2 x float] %.fca.0.insert.i, float %add.i.4, 1
%call.fca.0.gep = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %ref.tmp, i64 0, i32 0, i32 0
%call.fca.0.extract = extractvalue [2 x float] %.fca.1.insert.i, 0
store float %call.fca.0.extract, float* %call.fca.0.gep, align 4
%5 = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %ref.tmp, i64 0, i32 0, i32 1
%call.fca.1.extract = extractvalue [2 x float] %.fca.1.insert.i, 1
store float %call.fca.1.extract, float* %5, align 4
%_M_value.realp.i.i = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %ref.tmp, i64 0, i32 0, i32 0
%6 = load float, float* %_M_value.realp.i.i, align 4
%_M_value.realp.i.3 = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %tmpcast, i64 0, i32 0, i32 0
%7 = load float, float* %_M_value.realp.i.3, align 4
%add.i = fadd float %6, %7
store float %add.i, float* %_M_value.realp.i.3, align 4
%_M_value.imagp.i.i = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %ref.tmp, i64 0, i32 0, i32 1
%8 = load float, float* %_M_value.imagp.i.i, align 4
%_M_value3.imagp.i = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %tmpcast, i64 0, i32 0, i32 1
%9 = load float, float* %_M_value3.imagp.i, align 4
%add4.i = fadd float %8, %9
store float %add4.i, float* %_M_value3.imagp.i, align 4
%inc = add nsw i32 %i.0, 1
br label %for.cond
for.end: ; preds = %for.cond
%10 = load i64, i64* %ldd, align 8
store i64 %10, i64* bitcast (%"struct.std::complex"* @dd to i64*), align 4
call void @llvm.lifetime.end(i64 8, i8* %0) #3
ret void
}
And the IR after SROA pass
- IR Dump After SROA ***
; Function Attrs: nounwind
define void @_Z3fooi(i32 signext %n) #0 {
entry:
br label %for.cond
for.cond: ; preds = %for.body, %entry
%ldd.sroa.0.0 = phi i32 [ 0, %entry ], [ %5, %for.body ]
%ldd.sroa.6.0 = phi i32 [ 0, %entry ], [ %7, %for.body ]
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
%cmp = icmp slt i32 %i.0, %n
br i1 %cmp, label %for.body, label %for.end
for.body: ; preds = %for.cond
%0 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd, i64 0, i32 0, i32 0), align 4
%1 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd, i64 0, i32 0, i32 1), align 4
%2 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd2, i64 0, i32 0, i32 0), align 4
%3 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd2, i64 0, i32 0, i32 1), align 4
%mul.i = fmul float %0, %2
%mul4.i = fmul float %1, %3
%sub.i = fsub float %mul.i, %mul4.i
%mul5.i = fmul float %1, %2
%mul6.i = fmul float %0, %3
%add.i.4 = fadd float %mul5.i, %mul6.i
%.fca.0.insert.i = insertvalue [2 x float] undef, float %sub.i, 0
%.fca.1.insert.i = insertvalue [2 x float] %.fca.0.insert.i, float %add.i.4, 1
%call.fca.0.extract = extractvalue [2 x float] %.fca.1.insert.i, 0
%call.fca.1.extract = extractvalue [2 x float] %.fca.1.insert.i, 1
%4 = bitcast i32 %ldd.sroa.0.0 to float
%add.i = fadd float %call.fca.0.extract, %4
%5 = bitcast float %add.i to i32
%6 = bitcast i32 %ldd.sroa.6.0 to float
%add4.i = fadd float %call.fca.1.extract, %6
%7 = bitcast float %add4.i to i32
%inc = add nsw i32 %i.0, 1
br label %for.cond
for.end: ; preds = %for.cond
store i32 %ldd.sroa.0.0, i32* bitcast (%"struct.std::complex"* @dd to i32*), align 4
store i32 %ldd.sroa.6.0, i32* bitcast (float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd, i64 0, i32 0, i32 1) to i32*), align 4
ret void
}
http://reviews.llvm.org/D14596
More information about the llvm-commits
mailing list