[LLVMdev] instcombine adds alignment info

Thu Jun 18 07:21:41 PDT 2015

Is it correct behavior that 'instcombine' not only combines instructions but also adds alignment information, and why. (In my case I wished it wouldn't do that). Attached the input and output module.

Thanks,
Frank

; ModuleID = '<stdin>'

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

target triple = "x86_64-unknown-linux-gnu"

define void @main(float* noalias %arg0, float* noalias %arg1, float* noalias %arg2) {

entrypoint:

   %0 = bitcast float* %arg1 to <2 x float>*

   %1 = bitcast float* %arg2 to <2 x float>*

   %2 = bitcast <2 x float>* %0 to <4 x float>*

   %3 = bitcast <2 x float>* %1 to <4 x float>*

   %4 = bitcast float* %arg0 to <2 x float>*

   %5 = load <4 x float>* %2

   %6 = load <4 x float>* %3

   %7 = fadd <4 x float> %6, %5

   %8 = bitcast <2 x float>* %4 to <4 x float>*

   store <4 x float> %7, <4 x float>* %8

   %9 = getelementptr float* %arg1, i32 4

   %10 = bitcast float* %9 to <2 x float>*

   %11 = getelementptr float* %arg2, i32 4

   %12 = bitcast float* %11 to <2 x float>*

   %13 = getelementptr float* %arg0, i32 4

   %14 = bitcast <2 x float>* %10 to <4 x float>*

   %15 = bitcast <2 x float>* %12 to <4 x float>*

   %16 = bitcast float* %13 to <2 x float>*

   %17 = load <4 x float>* %14

   %18 = load <4 x float>* %15

   %19 = fadd <4 x float> %18, %17

   %20 = bitcast <2 x float>* %16 to <4 x float>*

   store <4 x float> %19, <4 x float>* %20

   %21 = getelementptr float* %arg1, i32 8

   %22 = bitcast float* %21 to <2 x float>*

   %23 = getelementptr float* %arg2, i32 8

   %24 = bitcast float* %23 to <2 x float>*

   %25 = getelementptr float* %arg0, i32 8

   %26 = bitcast <2 x float>* %22 to <4 x float>*

   %27 = bitcast <2 x float>* %24 to <4 x float>*

   %28 = bitcast float* %25 to <2 x float>*

   %29 = load <4 x float>* %26

   %30 = load <4 x float>* %27

   %31 = fadd <4 x float> %30, %29

   %32 = bitcast <2 x float>* %28 to <4 x float>*

   store <4 x float> %31, <4 x float>* %32

   %33 = getelementptr float* %arg1, i32 12

   %34 = bitcast float* %33 to <2 x float>*

   %35 = getelementptr float* %arg2, i32 12

   %36 = bitcast float* %35 to <2 x float>*

   %37 = getelementptr float* %arg0, i32 12

   %38 = bitcast <2 x float>* %34 to <4 x float>*

   %39 = bitcast <2 x float>* %36 to <4 x float>*

   %40 = bitcast float* %37 to <2 x float>*

   %41 = load <4 x float>* %38

   %42 = load <4 x float>* %39

   %43 = fadd <4 x float> %42, %41

   %44 = bitcast <2 x float>* %40 to <4 x float>*

   store <4 x float> %43, <4 x float>* %44

   ret void

}

Output after llvm-3.6/bin/opt -instcombine -S < vec_add.ll

; ModuleID = '<stdin>'

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

target triple = "x86_64-unknown-linux-gnu"

define void @main(float* noalias %arg0, float* noalias %arg1, float* noalias %arg2) {

entrypoint:

   %0 = bitcast float* %arg1 to <4 x float>*

   %1 = bitcast float* %arg2 to <4 x float>*

   %2 = load <4 x float>* %0, align 16

   %3 = load <4 x float>* %1, align 16

   %4 = fadd <4 x float> %3, %2

   %5 = bitcast float* %arg0 to <4 x float>*

   store <4 x float> %4, <4 x float>* %5, align 16

   %6 = getelementptr float* %arg1, i64 4

   %7 = getelementptr float* %arg2, i64 4

   %8 = getelementptr float* %arg0, i64 4

   %9 = bitcast float* %6 to <4 x float>*

   %10 = bitcast float* %7 to <4 x float>*

   %11 = load <4 x float>* %9, align 16

   %12 = load <4 x float>* %10, align 16

   %13 = fadd <4 x float> %12, %11

   %14 = bitcast float* %8 to <4 x float>*

   store <4 x float> %13, <4 x float>* %14, align 16

   %15 = getelementptr float* %arg1, i64 8

   %16 = getelementptr float* %arg2, i64 8

   %17 = getelementptr float* %arg0, i64 8

   %18 = bitcast float* %15 to <4 x float>*

   %19 = bitcast float* %16 to <4 x float>*

   %20 = load <4 x float>* %18, align 16

   %21 = load <4 x float>* %19, align 16

   %22 = fadd <4 x float> %21, %20

   %23 = bitcast float* %17 to <4 x float>*

   store <4 x float> %22, <4 x float>* %23, align 16

   %24 = getelementptr float* %arg1, i64 12

   %25 = getelementptr float* %arg2, i64 12

   %26 = getelementptr float* %arg0, i64 12

   %27 = bitcast float* %24 to <4 x float>*

   %28 = bitcast float* %25 to <4 x float>*

   %29 = load <4 x float>* %27, align 16

   %30 = load <4 x float>* %28, align 16

   %31 = fadd <4 x float> %30, %29

   %32 = bitcast float* %26 to <4 x float>*

   store <4 x float> %31, <4 x float>* %32, align 16

   ret void

}