[llvm-dev] [EXTERNAL] Re: ORC JIT error when using AVX2 vector instructions

Stefan Gränitz via llvm-dev llvm-dev at lists.llvm.org
Mon Aug 30 14:00:45 PDT 2021


Hi Frank

> That makes me think that the ORC JIT Kaleidoscope doesn't use the
> '+avx2' attribute.
>
> How can ORC JIT Kaleidoscope generate jitted code with AVX2 instructions?
Did you try adding something like:
JTMB.addFeatures({"+avx2"});

Here?
https://github.com/llvm/llvm-project/blob/7a2a765745973ebeb041276d2d9489a000ba9371/llvm/examples/Kaleidoscope/BuildingAJIT/Chapter1/KaleidoscopeJIT.h#L71

Hope it helps.
Best, Stefan

On 30/08/2021 21:46, Frank Winter via llvm-dev wrote:
> Thanks! Yeah, that was my silliness. Fixed and the module compiles now
> with ORC JIT Kaleidoscope.
>
> However, looking at the assembler I only see SSE (128 bit vectors)
> being generated:
>
> .Leval0_intern:
> .cfi_startproc
> addl %esi, %edi
> shll $3, %edi
> movslq %edi, %rax
> shlq $5, %rax
> movaps (%r8,%rax), %xmm0
> movaps 16(%r8,%rax), %xmm1
> mulps 16(%rcx,%rax), %xmm1
> mulps (%rcx,%rax), %xmm0
> movaps %xmm0, (%rdx,%rax)
> movaps %xmm1, 16(%rdx,%rax)
> retq
>
> I cross checked what LLC gives:
>
> Calling llc with no optional flags gives matching assembler, but when
> adding '-mattr=+avx2' I get AVX2 (256 bit vectors)
>
> .Leval0_intern:                         # @eval0_intern
>         .cfi_startproc
> # %bb.0:                                # %stack
>         addl    %esi, %edi
>         shll    $3, %edi
>         movslq  %edi, %rax
>         shlq    $5, %rax
>         vmovaps (%r8,%rax), %ymm0
>         vmulps  (%rcx,%rax), %ymm0, %ymm0
>         vmovaps %ymm0, (%rdx,%rax)
>         vzeroupper
>         retq
>
> That makes me think that the ORC JIT Kaleidoscope doesn't use the
> '+avx2' attribute.
>
> How can ORC JIT Kaleidoscope generate jitted code with AVX2 instructions?
>
> Thanks again & Best wishes,
> Frank
>
>
> ------------------------------------------------------------------------
> *From:* Craig Topper <craig.topper at gmail.com>
> *Sent:* Monday, August 30, 2021 3:20 PM
> *To:* Frank Winter <fwinter at jlab.org>
> *Cc:* llvm-dev at lists.llvm.org <llvm-dev at lists.llvm.org>
> *Subject:* [EXTERNAL] Re: [llvm-dev] ORC JIT error when using AVX2
> vector instructions
>  
> This is an illegal instruction. mul is an integer operation, but that
> has floating point types. The correct operation would be fmul.
>
> %21 = mul <8 x float> %20, %10
>
> ~Craig
>
>
> On Mon, Aug 30, 2021 at 12:08 PM Frank Winter via llvm-dev
> <llvm-dev at lists.llvm.org <mailto:llvm-dev at lists.llvm.org>> wrote:
>
>     Hi.
>
>     As soon as the module contains instructions operating on < 8 x
>     float > the ORC JIT refuses to work.
>
>     Here's the module that provokes the error given further below:
>
>     ; ModuleID = 'module'
>     source_filename = "module"
>     target datalayout =
>     "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
>
>     define private void @eval0_intern(i32 %arg0, i32 %arg1, <8 x
>     float>* %arg2, <8 x float>* %arg3, <8 x float>* %arg4) {
>     stack:
>       br label %afterstack
>
>     afterstack:                                       ; preds = %stack
>       %0 = add nsw i32 %arg0, %arg1
>       %1 = add nsw i32 0, %0
>       %2 = mul i32 %1, 1
>       %3 = add nsw i32 %2, 0
>       %4 = mul i32 %3, 1
>       %5 = add nsw i32 %4, 0
>       %6 = mul i32 %5, 1
>       %7 = add nsw i32 %6, 0
>       %8 = mul i32 %7, 8
>       %9 = getelementptr <8 x float>, <8 x float>* %arg3, i32 %8
>       %10 = load <8 x float>, <8 x float>* %9, align 32
>       %11 = add nsw i32 0, %0
>       %12 = mul i32 %11, 1
>       %13 = add nsw i32 %12, 0
>       %14 = mul i32 %13, 1
>       %15 = add nsw i32 %14, 0
>       %16 = mul i32 %15, 1
>       %17 = add nsw i32 %16, 0
>       %18 = mul i32 %17, 8
>       %19 = getelementptr <8 x float>, <8 x float>* %arg4, i32 %18
>       %20 = load <8 x float>, <8 x float>* %19, align 32
>       %21 = mul <8 x float> %20, %10
>       %22 = add nsw i32 0, %0
>       %23 = mul i32 %22, 1
>       %24 = add nsw i32 %23, 0
>       %25 = mul i32 %24, 1
>       %26 = add nsw i32 %25, 0
>       %27 = mul i32 %26, 1
>       %28 = add nsw i32 %27, 0
>       %29 = mul i32 %28, 8
>       %30 = getelementptr <8 x float>, <8 x float>* %arg2, i32 %29
>       store <8 x float> %21, <8 x float>* %30, align 32
>       ret void
>     }
>
>     define void @eval0(i32 %idx, [8 x i8]* %arg_ptr) {
>     entrypoint:
>       %0 = getelementptr [8 x i8], [8 x i8]* %arg_ptr, i32 0
>       %1 = bitcast [8 x i8]* %0 to i32*
>       %2 = load i32, i32* %1, align 4
>       %3 = getelementptr [8 x i8], [8 x i8]* %arg_ptr, i32 1
>       %4 = bitcast [8 x i8]* %3 to <8 x float>**
>       %5 = load <8 x float>*, <8 x float>** %4, align 8
>       %6 = getelementptr [8 x i8], [8 x i8]* %arg_ptr, i32 2
>       %7 = bitcast [8 x i8]* %6 to <8 x float>**
>       %8 = load <8 x float>*, <8 x float>** %7, align 8
>       %9 = getelementptr [8 x i8], [8 x i8]* %arg_ptr, i32 3
>       %10 = bitcast [8 x i8]* %9 to <8 x float>**
>       %11 = load <8 x float>*, <8 x float>** %10, align 8
>       call void @eval0_intern(i32 %idx, i32 %2, <8 x float>* %5, <8 x
>     float>* %8, <8 x float>* %11)
>       ret void
>     }
>     --------------------------
>
>
>     For the JIT part I'm using the Kaleidoscope ORC JIT as given in
>     the LLVM examples. However, when it comes to the symbol lookup the
>     program stops with output like this:
>
>     Lookup
>     LLVM ERROR: Cannot select: 0x562e8bb6c268: v4f32 = mul
>     0x562e8bb6bab0, 0x562e8bb6b6a0
>       0x562e8bb6bab0: v4f32,ch = load<(load 16 from %ir.19 + 16,
>     basealign 32)> 0x562e8baf8ca8, 0x562e8bb6c130, undef:i64
>         0x562e8bb6c130: i64 = add nuw 0x562e8bb6bcb8, Constant:i64<16>
>           0x562e8bb6bcb8: i64 = add 0x562e8bb6bc50, 0x562e8bb6b9e0
>             0x562e8bb6bc50: i64,ch = CopyFromReg 0x562e8baf8ca8,
>     Register:i64 %4
>               0x562e8bb6bbe8: i64 = Register %4
>             0x562e8bb6b9e0: i64 = shl 0x562e8bb6b910, Constant:i8<5>
>               0x562e8bb6b910: i64 = sign_extend 0x562e8bb6b770
>                 0x562e8bb6b770: i32 = shl 0x562e8bb6b500, Constant:i8<3>
>                   0x562e8bb6b500: i32 = add nsw 0x562e8bb6b3c8,
>     0x562e8bb6b498
>                     0x562e8bb6b3c8: i32,ch = CopyFromReg
>     0x562e8baf8ca8, Register:i32 %0
>                       0x562e8bb6b360: i32 = Register %0
>                     0x562e8bb6b498: i32,ch = CopyFromReg
>     0x562e8baf8ca8, Register:i32 %1
>                       0x562e8bb6b430: i32 = Register %1
>                   0x562e8bb6ea28: i8 = Constant<3>
>               0x562e8bb6c2d0: i8 = Constant<5>
>           0x562e8bb6b638: i64 = Constant<16>
>         0x562e8bb6bb18: i64 = undef
>       0x562e8bb6b6a0: v4f32,ch = load<(load 16 from %ir.9 + 16,
>     basealign 32)> 0x562e8baf8ca8, 0x562e8bb6c198, undef:i64
>         0x562e8bb6c198: i64 = add nuw 0x562e8bb6ba48, Constant:i64<16>
>           0x562e8bb6ba48: i64 = add 0x562e8bb6b8a8, 0x562e8bb6b9e0
>             0x562e8bb6b8a8: i64,ch = CopyFromReg 0x562e8baf8ca8,
>     Register:i64 %3
>               0x562e8bb6b840: i64 = Register %3
>             0x562e8bb6b9e0: i64 = shl 0x562e8bb6b910, Constant:i8<5>
>               0x562e8bb6b910: i64 = sign_extend 0x562e8bb6b770
>                 0x562e8bb6b770: i32 = shl 0x562e8bb6b500, Constant:i8<3>
>                   0x562e8bb6b500: i32 = add nsw 0x562e8bb6b3c8,
>     0x562e8bb6b498
>                     0x562e8bb6b3c8: i32,ch = CopyFromReg
>     0x562e8baf8ca8, Register:i32 %0
>                       0x562e8bb6b360: i32 = Register %0
>                     0x562e8bb6b498: i32,ch = CopyFromReg
>     0x562e8baf8ca8, Register:i32 %1
>                       0x562e8bb6b430: i32 = Register %1
>                   0x562e8bb6ea28: i8 = Constant<3>
>               0x562e8bb6c2d0: i8 = Constant<5>
>           0x562e8bb6b638: i64 = Constant<16>
>         0x562e8bb6bb18: i64 = undef
>
>
>     The module compiles fine with LLC. So, I assume that's not the
>     problem.
>
>     What might go wrong? Is there a way to initialize the ORC JIT with
>     the AVX2 option somehow?
>
>     This is using LLVM release 12.
>
>     Best,
>     Frank
>
>     _______________________________________________
>     LLVM Developers mailing list
>     llvm-dev at lists.llvm.org <mailto:llvm-dev at lists.llvm.org>
>     https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
>     <https://urldefense.proofpoint.com/v2/url?u=https-3A__lists.llvm.org_cgi-2Dbin_mailman_listinfo_llvm-2Ddev&d=DwMFaQ&c=CJqEzB1piLOyyvZjb8YUQw&r=tFpAzszScTWMAFcrGFW5xg&m=iIRT39rMHzg60BQQu6bv5Nzez97Rjf-90P-EHloWvtk&s=BpFT2lRfi7rhmDGQWAkBbHAKDe_9xPQKggZyX5VciuY&e=>
>
>
> _______________________________________________
> LLVM Developers mailing list
> llvm-dev at lists.llvm.org
> https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev

-- 
https://weliveindetail.github.io/blog/about/

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-dev/attachments/20210830/a1ba1fd1/attachment-0001.html>


More information about the llvm-dev mailing list