[PATCH] D14596: [SROA] Choose more profitable type in findCommonType

Guozhi Wei via llvm-commits llvm-commits at lists.llvm.org
Wed Dec 16 14:26:17 PST 2015


Carrot added a comment.

The IR after inlining:

- IR Dump After Function Integration/Inlining ***

; Function Attrs: nounwind
define void @_Z3fooi(i32 signext %n) #0 {
entry:

  %ldd = alloca i64, align 8
  %tmpcast = bitcast i64* %ldd to %"struct.std::complex"*
  %ref.tmp = alloca %"struct.std::complex", align 4
  %0 = bitcast i64* %ldd to i8*
  call void @llvm.lifetime.start(i64 8, i8* %0) #3
  %_M_value.realp.i = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %tmpcast, i64 0, i32 0, i32 0 
  store float 0.000000e+00, float* %_M_value.realp.i, align 4
  %_M_value2.imagp.i = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %tmpcast, i64 0, i32 0, i32 1 
  store float 0.000000e+00, float* %_M_value2.imagp.i, align 4
  br label %for.cond

for.cond:                                         ; preds = %for.body, %entry

  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
  %cmp = icmp slt i32 %i.0, %n
  br i1 %cmp, label %for.body, label %for.end

for.body:                                         ; preds = %for.cond

  %1 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd, i64 0, i32 0, i32 0), align 4
  %2 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd, i64 0, i32 0, i32 1), align 4
  %3 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd2, i64 0, i32 0, i32 0), align 4
  %4 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd2, i64 0, i32 0, i32 1), align 4
  %mul.i = fmul float %1, %3
  %mul4.i = fmul float %2, %4
  %sub.i = fsub float %mul.i, %mul4.i
  %mul5.i = fmul float %2, %3
  %mul6.i = fmul float %1, %4
  %add.i.4 = fadd float %mul5.i, %mul6.i
  %.fca.0.insert.i = insertvalue [2 x float] undef, float %sub.i, 0
  %.fca.1.insert.i = insertvalue [2 x float] %.fca.0.insert.i, float %add.i.4, 1
  %call.fca.0.gep = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %ref.tmp, i64 0, i32 0, i32 0 
  %call.fca.0.extract = extractvalue [2 x float] %.fca.1.insert.i, 0
  store float %call.fca.0.extract, float* %call.fca.0.gep, align 4
  %5 = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %ref.tmp, i64 0, i32 0, i32 1 
  %call.fca.1.extract = extractvalue [2 x float] %.fca.1.insert.i, 1
  store float %call.fca.1.extract, float* %5, align 4
  %_M_value.realp.i.i = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %ref.tmp, i64 0, i32 0, i32 0 
  %6 = load float, float* %_M_value.realp.i.i, align 4
  %_M_value.realp.i.3 = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %tmpcast, i64 0, i32 0, i32 0 
  %7 = load float, float* %_M_value.realp.i.3, align 4
  %add.i = fadd float %6, %7
  store float %add.i, float* %_M_value.realp.i.3, align 4
  %_M_value.imagp.i.i = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %ref.tmp, i64 0, i32 0, i32 1 
  %8 = load float, float* %_M_value.imagp.i.i, align 4
  %_M_value3.imagp.i = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %tmpcast, i64 0, i32 0, i32 1 
  %9 = load float, float* %_M_value3.imagp.i, align 4
  %add4.i = fadd float %8, %9
  store float %add4.i, float* %_M_value3.imagp.i, align 4
  %inc = add nsw i32 %i.0, 1
  br label %for.cond

for.end:                                          ; preds = %for.cond

  %10 = load i64, i64* %ldd, align 8
  store i64 %10, i64* bitcast (%"struct.std::complex"* @dd to i64*), align 4
  call void @llvm.lifetime.end(i64 8, i8* %0) #3
  ret void

}

And the IR after SROA pass

- IR Dump After SROA ***

; Function Attrs: nounwind
define void @_Z3fooi(i32 signext %n) #0 {
entry:

  br label %for.cond

for.cond:                                         ; preds = %for.body, %entry

  %ldd.sroa.0.0 = phi i32 [ 0, %entry ], [ %5, %for.body ]
  %ldd.sroa.6.0 = phi i32 [ 0, %entry ], [ %7, %for.body ]
  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
  %cmp = icmp slt i32 %i.0, %n
  br i1 %cmp, label %for.body, label %for.end

for.body:                                         ; preds = %for.cond

  %0 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd, i64 0, i32 0, i32 0), align 4
  %1 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd, i64 0, i32 0, i32 1), align 4
  %2 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd2, i64 0, i32 0, i32 0), align 4
  %3 = load float, float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd2, i64 0, i32 0, i32 1), align 4
  %mul.i = fmul float %0, %2
  %mul4.i = fmul float %1, %3
  %sub.i = fsub float %mul.i, %mul4.i
  %mul5.i = fmul float %1, %2
  %mul6.i = fmul float %0, %3
  %add.i.4 = fadd float %mul5.i, %mul6.i
  %.fca.0.insert.i = insertvalue [2 x float] undef, float %sub.i, 0
  %.fca.1.insert.i = insertvalue [2 x float] %.fca.0.insert.i, float %add.i.4, 1
  %call.fca.0.extract = extractvalue [2 x float] %.fca.1.insert.i, 0
  %call.fca.1.extract = extractvalue [2 x float] %.fca.1.insert.i, 1
  %4 = bitcast i32 %ldd.sroa.0.0 to float 
  %add.i = fadd float %call.fca.0.extract, %4
  %5 = bitcast float %add.i to i32
  %6 = bitcast i32 %ldd.sroa.6.0 to float 
  %add4.i = fadd float %call.fca.1.extract, %6
  %7 = bitcast float %add4.i to i32
  %inc = add nsw i32 %i.0, 1
  br label %for.cond

for.end:                                          ; preds = %for.cond

  store i32 %ldd.sroa.0.0, i32* bitcast (%"struct.std::complex"* @dd to i32*), align 4
  store i32 %ldd.sroa.6.0, i32* bitcast (float* getelementptr inbounds (%"struct.std::complex", %"struct.std::complex"* @dd, i64 0, i32 0, i32 1) to i32*), align 4
  ret void

}


http://reviews.llvm.org/D14596





More information about the llvm-commits mailing list