[LLVMdev] 64bit MRV problem: { float, float, float} -> { double, float }

Ralf Karrenberg Chareos at gmx.de
Fri Jan 29 05:58:35 PST 2010


Hey Duncan, hey everybody else,

I just stumbled upon a problem in the latest llvm-gcc trunk which is
related to my previous problem with the 64bit ABI and structs:

Given the following code:

struct float3 { float x, y, z; };
extern "C" void __attribute__((noinline)) test(float3 a, float3* res) {
    res->y = a.y;
}
int main(void) {
    float3 a;
    float3 res;
    test(a, &res);
}


llvm-gcc -c -emit-llvm -O3 produces this:

%struct.float3 = type { float, float, float }
define void @test(double %a.0, float %a.1, %struct.float3* nocapture
%res) nounwind noinline {
entry:
  %tmp8 = bitcast double %a.0 to i64              ; <i64> [#uses=1]
  %tmp9 = zext i64 %tmp8 to i96                   ; <i96> [#uses=1]
  %tmp1 = lshr i96 %tmp9, 32                      ; <i96> [#uses=1]
  %tmp2 = trunc i96 %tmp1 to i32                  ; <i32> [#uses=1]
  %tmp3 = bitcast i32 %tmp2 to float              ; <float> [#uses=1]
  %0 = getelementptr inbounds %struct.float3* %res, i64 0, i32 1 ;
<float*> [#uses=1]
  store float %tmp3, float* %0, align 4
  ret void
}
define i32 @main() nounwind {
entry:
  %res = alloca %struct.float3, align 8           ; <%struct.float3*>
[#uses=1]
  call void @test(double undef, float 0.000000e+00, %struct.float3*
%res) nounwind
  ret i32 0
}

The former second value of the struct is casted from float to i64,
zero-extended, shifted, truncated and casted back to float.
Unfortunately, in my case, LLVM seems to be unable to remove this kind
of code (in more complex functions of course) even though it gets
inlined and optimized. I end up with functions like this one:

define void @xyz(float %aX, float %aY, float %aZ, float* noalias
nocapture %resX, float* noalias nocapture %resY, float* noalias
nocapture %resZ) nounwind {
entry:
  %0 = fadd float %aZ, 5.000000e-01          ; <float> [#uses=1]
  %1 = fadd float %aY, 5.000000e-01          ; <float> [#uses=1]
  %2 = fadd float %aX, 5.000000e-01          ; <float> [#uses=1]
  %tmp16.i.i = bitcast float %1 to i32            ; <i32> [#uses=1]
  %tmp17.i.i = zext i32 %tmp16.i.i to i96         ; <i96> [#uses=1]
  %tmp18.i.i = shl i96 %tmp17.i.i, 32             ; <i96> [#uses=1]
  %tmp19.i = zext i96 %tmp18.i.i to i128          ; <i128> [#uses=1]
  %tmp8.i = lshr i128 %tmp19.i, 32                ; <i128> [#uses=1]
  %tmp9.i = trunc i128 %tmp8.i to i32             ; <i32> [#uses=1]
  %tmp10.i = bitcast i32 %tmp9.i to float         ; <float> [#uses=1]
  store float %2, float* %resX, align 4
  store float %tmp10.i, float* %resY, align 4
  store float %0, float* %resZ, align 4
  ret void
}


llvm-gcc4.2-2.5 generates the following code for the same example:

define void @test(double %a.0, float %a.1, %struct.float3* nocapture
%res) nounwind noinline {
entry:
  %a_addr = alloca %struct.float3, align 8        ; <%struct.float3*>
[#uses=3]
  %0 = bitcast %struct.float3* %a_addr to double* ; <double*> [#uses=1]
  store double %a.0, double* %0
  %1 = getelementptr %struct.float3* %a_addr, i64 0, i32 2 ; <float*>
[#uses=1]
  store float %a.1, float* %1, align 8
  %2 = getelementptr %struct.float3* %a_addr, i64 0, i32 1 ; <float*>
[#uses=1]
  %3 = load float* %2, align 4                    ; <float> [#uses=1]
  %4 = getelementptr %struct.float3* %res, i64 0, i32 1 ; <float*> [#uses=1]
  store float %3, float* %4, align 4
  ret void
}

Apparently, the optimizer can work better with that code and after
inlining, it all goes away as expected.


Is this change intentional?
Any ideas where that code comes from or why it cannot be removed?


Best,
Ralf




More information about the llvm-dev mailing list