[PATCH] [RFC] Patch for Bug 18808 - clang -O3 -flto drops Target feature flags

Eric Christopher echristo at gmail.com
Thu Mar 26 16:28:53 PDT 2015


Very much so. Right now, this works without doing this:

dzur:~/tmp> cat bar.c
#include <stdint.h>
#include <stdlib.h>

typedef uint64_t avxreg __attribute__((ext_vector_type(4)));

int main(int argc, char **argv) {
        avxreg x=argc,y=argc;
        for (int i=0; i<argc; i++) {
                int foo = atoi(argv[i]);
                x = y + (avxreg)foo;
                y = x;
        }
        return x.x;
}
dzur:~/tmp> ~/builds/build-llvm/Debug+Asserts/bin/clang -O3 -flto -mavx2
bar.c
dzur:~/tmp> llvm-objdump -disassemble a.out | more

main:
  400520:       55      pushq   %rbp
  400521:       53      pushq   %rbx
  400522:       48 83 ec 28     subq    $40, %rsp
  400526:       48 89 f3        movq    %rsi, %rbx
  400529:       89 fd   movl    %edi, %ebp
  40052b:       48 63 c5        movslq  %ebp, %rax
  40052e:       c4 e1 f9 6e c0  vmovq   %rax, %xmm0
  400533:       c4 e2 7d 19 c0  vbroadcastsd    %xmm0, %ymm0
  400538:       85 c0   testl   %eax, %eax
  40053a:       7e 42   jle     66
  40053c:       0f 1f 40 00     nopl    (%rax)
  400540:       c5 fc 11 04 24  vmovups %ymm0, (%rsp)
  400545:       48 8b 3b        movq    (%rbx), %rdi
  400548:       31 f6   xorl    %esi, %esi
  40054a:       ba 0a 00 00 00  movl    $10, %edx
  40054f:       c5 f8 77        vzeroupper
  400552:       e8 b9 fe ff ff  callq   -327
  400557:       48 98   cltq
  400559:       c4 e1 f9 6e c0  vmovq   %rax, %xmm0
  40055e:       c4 e2 7d 59 c0  vpbroadcastq    %xmm0, %ymm0
  400563:       c5 fe 6f 0c 24  vmovdqu (%rsp), %ymm1
  400568:       c5 fd d4 c9     vpaddq  %ymm1, %ymm0, %ymm1
  40056c:       c5 fe 7f 0c 24  vmovdqu %ymm1, (%rsp)
  400571:       c5 fc 10 04 24  vmovups (%rsp), %ymm0
  400576:       48 83 c3 08     addq    $8, %rbx
  40057a:       ff cd   decl    %ebp
  40057c:       75 c2   jne     -62
  40057e:       c4 e1 f9 7e c0  vmovq   %xmm0, %rax
  400583:       48 83 c4 28     addq    $40, %rsp
  400587:       5b      popq    %rbx
  400588:       5d      popq    %rbp
  400589:       c5 f8 77        vzeroupper
  40058c:       c3      retq
  40058d:       0f 1f 00        nopl    (%rax)

which should be precisely what's expected.

If you look at the IR generated from clang:

dzur:~/tmp> ~/builds/build-llvm/Debug+Asserts/bin/clang -O3 -flto -mavx2
bar.c -S -o -
; ModuleID = 'bar.c'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: nounwind uwtable
define i32 @main(i32 %argc, i8** nocapture readonly %argv) #0 {
entry:
  %conv = sext i32 %argc to i64
  %splat.splatinsert = insertelement <4 x i64> undef, i64 %conv, i32 0
  %splat.splat = shufflevector <4 x i64> %splat.splatinsert, <4 x i64>
undef, <4 x i32> zeroinitializer
  %cmp14 = icmp sgt i32 %argc, 0
  br i1 %cmp14, label %for.body.preheader, label %for.end

for.body.preheader:                               ; preds = %entry
  br label %for.body

for.body:                                         ; preds =
%for.body.preheader, %for.body
  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0,
%for.body.preheader ]
  %y.015 = phi <4 x i64> [ %add, %for.body ], [ %splat.splat,
%for.body.preheader ]
  %arrayidx = getelementptr inbounds i8*, i8** %argv, i64 %indvars.iv
  %0 = load i8*, i8** %arrayidx, align 8, !tbaa !1
  %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #2
  %sext = shl i64 %call.i, 32
  %conv5 = ashr exact i64 %sext, 32
  %splat.splatinsert6 = insertelement <4 x i64> undef, i64 %conv5, i32 0
  %splat.splat7 = shufflevector <4 x i64> %splat.splatinsert6, <4 x i64>
undef, <4 x i32> zeroinitializer
  %add = add <4 x i64> %splat.splat7, %y.015
  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
  %exitcond = icmp eq i32 %lftr.wideiv, %argc
  br i1 %exitcond, label %for.end.loopexit, label %for.body

for.end.loopexit:                                 ; preds = %for.body
  %add.lcssa = phi <4 x i64> [ %add, %for.body ]
  br label %for.end

for.end:                                          ; preds =
%for.end.loopexit, %entry
  %y.0.lcssa = phi <4 x i64> [ %splat.splat, %entry ], [ %add.lcssa,
%for.end.loopexit ]
  %1 = extractelement <4 x i64> %y.0.lcssa, i64 0
  %conv8 = trunc i64 %1 to i32
  ret i32 %conv8
}

; Function Attrs: nounwind
declare i64 @strtol(i8* readonly, i8** nocapture, i32) #1

attributes #0 = { nounwind uwtable "less-precise-fpmad"="false"
"no-frame-pointer-elim"="false" "no-infs-fp-math"="false"
"no-nans-fp-math"="false" "stack-protector-buffer-size"="8"
"target-cpu"="x86-64"
"target-features"="+sse4.2,+avx2,+ssse3,+sse3,+sse,+sse2,+sse4.1,+avx,+popcnt"
"unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind "less-precise-fpmad"="false"
"no-frame-pointer-elim"="false" "no-infs-fp-math"="false"
"no-nans-fp-math"="false" "stack-protector-buffer-size"="8"
"target-cpu"="x86-64"
"target-features"="+sse4.2,+avx2,+ssse3,+sse3,+sse,+sse2,+sse4.1,+avx,+popcnt"
"unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #2 = { nounwind }

!llvm.ident = !{!0}

!0 = !{!"clang version 3.7.0 (trunk 233238) (llvm/trunk 233240)"}
!1 = !{!2, !2, i64 0}
!2 = !{!"any pointer", !3, i64 0}
!3 = !{!"omnipotent char", !4, i64 0}
!4 = !{!"Simple C/C++ TBAA"}

You'll see that the target-features for the function are set to the
appropriate level of avx.

I'll take a look at the PR to make sure we can just close it.

-eric


On Thu, Mar 26, 2015 at 4:23 PM David Blaikie <dblaikie at gmail.com> wrote:

> Eric - this sounds related to your recent work. I assume we should be
> producing the same asm, but not by passing any flags to the linker, instead
> by using function attributes?
>
>
> REPOSITORY
>   rL LLVM
>
> http://reviews.llvm.org/D8629
>
> EMAIL PREFERENCES
>   http://reviews.llvm.org/settings/panel/emailpreferences/
>
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20150326/e83b7ba5/attachment.html>


More information about the cfe-commits mailing list