[LLVMdev] [NVPTX] [PATCH] CUDA inline PTX asm definitions scoping "{" "}" is broken

Dmitry N. Mikushin maemarcus at gmail.com
Tue Jul 10 16:53:08 PDT 2012


Let me propose a fix:

--- a/llvm/tools/clang/lib/Basic/Targets.cpp    (revision 157736)
+++ b/llvm/tools/clang/lib/Basic/Targets.cpp    (working copy)
@@ -966,6 +966,10 @@
       AddrSpaceMap = &NVPTXAddrSpaceMap;
       // Define available target features
       // These must be defined in sorted order!
+
+      // {} in inline assembly are local scope specifiers, not assembly
variant
+      // specifiers.
+      NoAsmVariants = true;
     }
     virtual void getTargetDefines(const LangOptions &Opts,
                                   MacroBuilder &Builder) const {

Is it correct? Could you please review,

Thanks,
- D.

2012/7/11 Dmitry N. Mikushin <maemarcus at gmail.com>

> Yes, sure, good idea, because might be also Clang-related.
>
> http://llvm.org/bugs/show_bug.cgi?id=13322
>
>
> 2012/7/11 Chad Rosier <mcrosier at apple.com>
>
>> Dmitry,
>> You might be better served by filing this as a bug (http://llvm.org/bugs/).
>>  Please include a test case and the steps to reproduce (i.e., what you've
>> provided below).
>>
>>  Chad
>>
>> On Jul 10, 2012, at 3:15 PM, Dmitry N. Mikushin wrote:
>>
>> Hi,
>>
>> Looks like "{" and "}" are lost when trying to use the combination of
>> Clang and NVPTX, which may result into clash of definitions of the
>> function-scope and asm-scope. Here is an example:
>>
>> > cat test.cu
>> __attribute__((device)) __attribute__((nv_linkonce_odr)) __inline__ int
>> __any(int a) {
>>     int result;
>>     asm __volatile__ ("{ \n\t"
>>         ".reg .pred \t%%p1; \n\t"
>>         ".reg .pred \t%%p2; \n\t"
>>         "setp.ne.u32 \t%%p1, %1, 0; \n\t"
>>         "vote.any.pred \t%%p2, %%p1; \n\t"
>>         "selp.s32 \t%0, 1, 0, %%p2; \n\t"
>>         "}" : "=r"(result) : "r"(a));
>>     return result;
>> }
>>
>> > clang -cc1 -emit-llvm -fcuda-is-device -triple ptx64-unknown-unknown
>> test.cu -o test.ll
>> > cat test.ll
>> ; ModuleID = 'test.cu'
>> target datalayout = "e-p:64:64-i64:64:64-f64:64:64-n1:8:16:32:64"
>> target triple = "ptx64-unknown-unknown"
>>
>> define ptx_device i32 @_Z5__anyi(i32 %a) nounwind inlinehint {
>> entry:
>>   %a.addr = alloca i32, align 4
>>   %result = alloca i32, align 4
>>   store i32 %a, i32* %a.addr, align 4
>>   %0 = load i32* %a.addr, align 4
>>   %1 = call i32 asm sideeffect "$( \0A\09.reg .pred \09%p1; \0A\09.reg
>> .pred \09%p2; \0A\09setp.ne.u32 \09%p1, $1, 0; \0A\09vote.any.pred \09%p2,
>> %p1; \0A\09selp.s32 \09$0, 1, 0, %p2; \0A\09$)", "=r,r"(i32 %0) nounwind,
>> !srcloc !0
>>   store i32 %1, i32* %result, align 4
>>   %2 = load i32* %result, align 4
>>   ret i32 %2
>> }
>>
>> !0 = metadata !{i32 127, i32 132, i32 166, i32 200, i32 242, i32 285, i32
>> 327}
>>
>> > llc -march=nvptx64 test.ll -o test.ptx
>> > cat test.ptx
>> //
>> // Generated by LLVM NVPTX Back-End
>> //
>>
>> .version 3.0
>> .target sm_10, texmode_independent
>> .address_size 64
>>
>>
>>     // .globl    _Z5__anyi
>> .visible .global .align 4 .b8     __local_depot0[8];
>>
>> .func  (.reg .b32 func_retval0) _Z5__anyi(
>>     .reg .b32 _Z5__anyi_param_0
>> )                                       // @_Z5__anyi
>> {
>>     .reg .b64     %SP;
>>     .reg .b64     %SPL;
>>     .reg .pred %p<396>;
>>     .reg .s16 %rc<396>;
>>     .reg .s16 %rs<396>;
>>     .reg .s32 %r<396>;
>>     .reg .s64 %rl<396>;
>>     .reg .f32 %f<396>;
>>     .reg .f64 %fl<396>;
>>
>> // BB#0:                                // %entry
>>     mov.u64     %SP, __local_depot0;
>>     mov.b32    %r0, _Z5__anyi_param_0;
>>     st.global.u32     [%SP+0], %r0;
>>     // inline asm
>>
>>     .reg .pred     %p1;
>>     .reg .pred     %p2;
>>     setp.ne.u32     %p1, %r0, 0;
>>     vote.any.pred     %p2, %p1;
>>     selp.s32     %r0, 1, 0, %p2;
>>
>>     // inline asm
>>     st.global.u32     [%SP+4], %r0;
>>     mov.b32    func_retval0, %r0;
>>     ret;
>> }
>>
>> > ptxas test.ptx -o test.cubin
>> ptxas test.ptx, line 33; error   : Duplicate definition of variable '%p1'
>> ptxas test.ptx, line 34; error   : Duplicate definition of variable '%p2'
>> ptxas test.ptx, line 36; error   : Instruction 'vote' requires .target
>> sm_12 or higher
>> ptxas fatal   : Ptx assembly aborted due to errors
>>
>> - D.
>>
>> _______________________________________________
>> LLVM Developers mailing list
>> LLVMdev at cs.uiuc.edu         http://llvm.cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/llvmdev
>>
>>
>>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20120711/8a05b0a6/attachment.html>


More information about the llvm-dev mailing list