[LLVMdev] [NVPTX] llc -march=nvptx64 -mcpu=sm_20 generates invalid zero align for device function params
Dmitry N. Mikushin
maemarcus at gmail.com
Tue Jul 10 18:46:44 PDT 2012
Hello,
FYI, this is a bug http://llvm.org/bugs/show_bug.cgi?id=13324
When compiling the following code for sm_20, func params are by some reason
given with .align 0, which is invalid. Problem does not occur if compiled
for sm_10.
> cat test.ll
; ModuleID = '__kernelgen_main_module'
target datalayout = "e-p:64:64-i64:64:64-f64:64:64-n1:8:16:32:64"
target triple = "ptx64-unknown-unknown"
%struct.float2 = type { float, float }
define ptx_device void @__internal_dsmul(%struct.float2* noalias nocapture
sret %agg.result, %struct.float2* nocapture byval %x, %struct.float2*
nocapture byval %y) nounwind inlinehint alwaysinline {
entry:
%y1 = getelementptr inbounds %struct.float2* %x, i64 0, i32 1
%0 = load float* %y1, align 4
%sub = fsub float -0.000000e+00, %0
%1 = tail call float asm "mad.f32 $0, $1, $2, $3;", "=f,f,f,f"(float
%sub, float 4.097000e+03, float %0) nounwind
%2 = tail call float asm "mad.f32 $0, $1, $2, $3;", "=f,f,f,f"(float %0,
float 4.097000e+03, float %1) nounwind
%y5 = getelementptr inbounds %struct.float2* %y, i64 0, i32 1
%3 = load float* %y5, align 4
%sub7 = fsub float -0.000000e+00, %3
%4 = tail call float asm "mad.f32 $0, $1, $2, $3;", "=f,f,f,f"(float
%sub7, float 4.097000e+03, float %3) nounwind
%5 = tail call float asm "mad.f32 $0, $1, $2, $3;", "=f,f,f,f"(float %3,
float 4.097000e+03, float %4) nounwind
%sub12 = fsub float %0, %2
%sub14 = fsub float %3, %5
%6 = tail call float @llvm.nvvm.mul.rn.f(float %0, float %3) nounwind
%sub18 = fsub float -0.000000e+00, %6
%7 = tail call float asm "mad.f32 $0, $1, $2, $3;", "=f,f,f,f"(float %2,
float %5, float %sub18) nounwind
%8 = tail call float asm "mad.f32 $0, $1, $2, $3;", "=f,f,f,f"(float %2,
float %sub14, float %7) nounwind
%9 = tail call float asm "mad.f32 $0, $1, $2, $3;", "=f,f,f,f"(float %5,
float %sub12, float %8) nounwind
%10 = tail call float asm "mad.f32 $0, $1, $2, $3;", "=f,f,f,f"(float
%sub12, float %sub14, float %9) nounwind
%x24 = getelementptr inbounds %struct.float2* %y, i64 0, i32 0
%11 = load float* %x24, align 4
%12 = tail call float @llvm.nvvm.mul.rn.f(float %0, float %11) nounwind
%x26 = getelementptr inbounds %struct.float2* %x, i64 0, i32 0
%13 = load float* %x26, align 4
%14 = tail call float @llvm.nvvm.mul.rn.f(float %13, float %3) nounwind
%add = fadd float %12, %14
%add29 = fadd float %10, %add
%15 = tail call float @llvm.nvvm.add.rn.f(float %6, float %add29) nounwind
%sub32 = fsub float %6, %15
%16 = tail call float @llvm.nvvm.add.rn.f(float %sub32, float %add29)
nounwind
%agg.result.0 = getelementptr inbounds %struct.float2* %agg.result, i64
0, i32 0
store float %16, float* %agg.result.0, align 8
%agg.result.1 = getelementptr inbounds %struct.float2* %agg.result, i64
0, i32 1
store float %15, float* %agg.result.1, align 4
ret void
}
declare ptx_device float @llvm.nvvm.add.rn.f(float, float) nounwind readnone
declare ptx_device float @llvm.nvvm.mul.rn.f(float, float) nounwind readnone
> llc -march=nvptx64 -mcpu=sm_20 test.ll -o test.ptx
> cat test.ptx
//
// Generated by LLVM NVPTX Back-End
//
.version 3.0
.target sm_20, texmode_independent
.address_size 64
// .globl __internal_dsmul
.func __internal_dsmul(
.param .b64 __internal_dsmul_param_0,
.param .align 0 .b8 __internal_dsmul_param_1[8],
.param .align 0 .b8 __internal_dsmul_param_2[8]
) // @__internal_dsmul
{
.reg .pred %p<396>;
.reg .s16 %rc<396>;
.reg .s16 %rs<396>;
.reg .s32 %r<396>;
.reg .s64 %rl<396>;
.reg .f32 %f<396>;
.reg .f64 %fl<396>;
// BB#0: // %entry
mov.b64 %rl0, __internal_dsmul_param_1;
cvta.local.u64 %rl0, %rl0;
ld.f32 %f0, [%rl0+4];
neg.f32 %f1, %f0;
mov.b64 %rl1, __internal_dsmul_param_2;
mov.f32 %f2, 0f45800800;
// inline asm
mad.f32 %f1, %f1, %f2, %f0;
// inline asm
// inline asm
mad.f32 %f3, %f0, %f2, %f1;
// inline asm
cvta.local.u64 %rl1, %rl1;
ld.f32 %f1, [%rl1+4];
neg.f32 %f4, %f1;
// inline asm
mad.f32 %f4, %f4, %f2, %f1;
// inline asm
// inline asm
mad.f32 %f4, %f1, %f2, %f4;
// inline asm
sub.f32 %f5, %f0, %f3;
sub.f32 %f6, %f1, %f4;
mul.rn.f32 %f2, %f0, %f1;
neg.f32 %f7, %f2;
// inline asm
mad.f32 %f7, %f3, %f4, %f7;
// inline asm
// inline asm
mad.f32 %f3, %f3, %f6, %f7;
// inline asm
// inline asm
mad.f32 %f3, %f4, %f5, %f3;
// inline asm
// inline asm
mad.f32 %f3, %f5, %f6, %f3;
// inline asm
ld.f32 %f4, [%rl1];
mul.rn.f32 %f0, %f0, %f4;
ld.f32 %f4, [%rl0];
mul.rn.f32 %f1, %f4, %f1;
add.f32 %f0, %f0, %f1;
add.f32 %f1, %f3, %f0;
add.rn.f32 %f0, %f2, %f1;
sub.f32 %f2, %f2, %f0;
add.rn.f32 %f1, %f2, %f1;
ld.param.u64 %rl0, [__internal_dsmul_param_0];
st.f32 [%rl0], %f1;
st.f32 [%rl0+4], %f0;
ret;
}
> ptxas -arch=sm_20 test.ptx -o ptx.cubin
ptxas test.ptx, line 13; error : Alignment must be a power of two
ptxas test.ptx, line 14; error : Alignment must be a power of two
ptxas fatal : Ptx assembly aborted due to errors
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20120711/e55c1cd9/attachment.html>
More information about the llvm-dev
mailing list