[PATCH] [RFC] Patch for Bug 18808 - clang -O3 -flto drops Target feature flags

David Blaikie dblaikie at gmail.com
Thu Mar 26 16:19:13 PDT 2015


Eric - this sounds related to your recent work. I assume we should be
producing the same asm, but not by passing any flags to the linker, instead
by using function attributes?
On Mar 26, 2015 3:44 AM, "suyog" <sardask01 at gmail.com> wrote:

> Hi void, dexonsmith, samsonov, rafael,
>
> This is regarding bug 18808 where target feature was set at the front end,
> but not passed to the linker when using -flto flag (Gold linker)
>
> The option string  "-plugin-opt=-mattr=feature" was not appended in
> command generated for linker.
>
> Approach for this patch :
>
> In lib/tools/clang/lib/Driver/Tools.cpp file
>
> 1. Add a new function getTargetFeatureList() to return the list of target
> features
> 2. Modify the getTargetFeature() function to set target-feature separately
> in clang options
> 3. In the AddGoldPlugin() function, retrieve the list of target features
> and append to "-plugin-opt=-mattr=". Multiple features are appended
> separated by comma.
>
> No make-check regression observed with the patch.
>
> Test case mentioned in 18808 :
>
> $ cat test.c
>
> #include <stdint.h>
> #include <stdlib.h>
>
> typedef uint64_t avxreg __attribute__((ext_vector_type(4)));
>
> int main(int argc, char **argv) {
>         avxreg x=argc,y=argc;
>         for (int i=0; i<argc; i++) {
>                 int foo = atoi(argv[i]);
>                 x = y + (avxreg)foo;
>                 y = x;
>         }
>         return x.x;
> }
>
>
> Assembly generated before patch :
>
> $ clang -O3 -flto -mavx2 test.c -o test
> $ objdump -d test > 1.s
> $ cat 1.s
>
> 00000000004005d0 <main>:
>   4005d0:       55                      push   %rbp
>   4005d1:       53                      push   %rbx
>   4005d2:       48 83 ec 28             sub    $0x28,%rsp
>   4005d6:       48 89 f3                mov    %rsi,%rbx
>   4005d9:       89 fd                   mov    %edi,%ebp
>   4005db:       48 63 c5                movslq %ebp,%rax
>   4005de:       66 48 0f 6e c0          movq   %rax,%xmm0
>   4005e3:       66 0f 70 c0 44          pshufd $0x44,%xmm0,%xmm0
>   4005e8:       85 c0                   test   %eax,%eax
>   4005ea:       7e 51                   jle    40063d <main+0x6d>
>   4005ec:       66 0f 6f d0             movdqa %xmm0,%xmm2
>   4005f0:       66 0f 7f 14 24          movdqa %xmm2,(%rsp)
>   4005f5:       66 0f 7f 44 24 10       movdqa %xmm0,0x10(%rsp)
>   4005fb:       48 8b 3b                mov    (%rbx),%rdi
>   4005fe:       31 f6                   xor    %esi,%esi
>   400600:       ba 0a 00 00 00          mov    $0xa,%edx
>   400605:       e8 c6 fe ff ff          callq  4004d0 <strtol at plt>
>   40060a:       66 0f 6f 14 24          movdqa (%rsp),%xmm2
>   40060f:       48 98                   cltq
>   400611:       66 48 0f 6e c0          movq   %rax,%xmm0
>   400616:       66 0f 70 c0 44          pshufd $0x44,%xmm0,%xmm0
>   40061b:       66 0f d4 d0             paddq  %xmm0,%xmm2
>   40061f:       66 0f 6f 4c 24 10       movdqa 0x10(%rsp),%xmm1
>   400625:       66 0f d4 c8             paddq  %xmm0,%xmm1
>   400629:       66 0f 7f 4c 24 10       movdqa %xmm1,0x10(%rsp)
>   40062f:       66 0f 6f 44 24 10       movdqa 0x10(%rsp),%xmm0
>   400635:       48 83 c3 08             add    $0x8,%rbx
>   400639:       ff cd                   dec    %ebp
>   40063b:       75 b3                   jne    4005f0 <main+0x20>
>   40063d:       66 48 0f 7e c0          movq   %xmm0,%rax
>   400642:       48 83 c4 28             add    $0x28,%rsp
>   400646:       5b                      pop    %rbx
>   400647:       5d                      pop    %rbp
>   400648:       c3                      retq
>   400649:       0f 1f 80 00 00 00 00    nopl   0x0(%rax)
>
>
> Assembly generated after patch :
>
> $ clang -O3 -flto -mavx2 test.c -o test
> $ objdump -d test > 2.s
> $ cat 2.s
>
> 0000000004005d0 <main>:
>   4005d0:       55                      push   %rbp
>   4005d1:       53                      push   %rbx
>   4005d2:       48 83 ec 28             sub    $0x28,%rsp
>   4005d6:       48 89 f3                mov    %rsi,%rbx
>   4005d9:       89 fd                   mov    %edi,%ebp
>   4005db:       48 63 c5                movslq %ebp,%rax
>   4005de:       c4 e1 f9 6e c0          vmovq  %rax,%xmm0
>   4005e3:       c4 e2 7d 19 c0          vbroadcastsd %xmm0,%ymm0
>   4005e8:       85 c0                   test   %eax,%eax
>   4005ea:       7e 42                   jle    40062e <main+0x5e>
>   4005ec:       0f 1f 40 00             nopl   0x0(%rax)
>   4005f0:       c5 fc 11 04 24          vmovups %ymm0,(%rsp)
>   4005f5:       48 8b 3b                mov    (%rbx),%rdi
>   4005f8:       31 f6                   xor    %esi,%esi
>   4005fa:       ba 0a 00 00 00          mov    $0xa,%edx
>   4005ff:       c5 f8 77                vzeroupper
>   400602:       e8 c9 fe ff ff          callq  4004d0 <strtol at plt>
>   400607:       48 98                   cltq
>   400609:       c4 e1 f9 6e c0          vmovq  %rax,%xmm0
>   40060e:       c4 e2 7d 59 c0          vpbroadcastq %xmm0,%ymm0
>   400613:       c5 fe 6f 0c 24          vmovdqu (%rsp),%ymm1
>   400618:       c5 fd d4 c9             vpaddq %ymm1,%ymm0,%ymm1
>   40061c:       c5 fe 7f 0c 24          vmovdqu %ymm1,(%rsp)
>   400621:       c5 fc 10 04 24          vmovups (%rsp),%ymm0
>   400626:       48 83 c3 08             add    $0x8,%rbx
>   40062a:       ff cd                   dec    %ebp
>   40062c:       75 c2                   jne    4005f0 <main+0x20>
>   40062e:       c4 e1 f9 7e c0          vmovq  %xmm0,%rax
>   400633:       48 83 c4 28             add    $0x28,%rsp
>   400637:       5b                      pop    %rbx
>   400638:       5d                      pop    %rbp
>   400639:       c5 f8 77                vzeroupper
>   40063c:       c3                      retq
>   40063d:       0f 1f 00                nopl   (%rax)
>
>
> Visible clearly from above that avx instructions are generated after
> applying the patch.
>
> Can some one please help in reviewing this patch?
> Also, can someone help in forming a test case for this patch? I couldn't
> figure out how to write test case for checking the generation of avx
> instructions.
>
> Regards,
> Suyog
>
> REPOSITORY
>   rL LLVM
>
> http://reviews.llvm.org/D8629
>
> Files:
>   lib/Driver/Tools.cpp
>
> Index: lib/Driver/Tools.cpp
> ===================================================================
> --- lib/Driver/Tools.cpp
> +++ lib/Driver/Tools.cpp
> @@ -1544,6 +1544,10 @@
>    }
>  }
>
> +static void getTargetFeatureList(const Driver &D, const llvm::Triple
> &Triple,
> +                                 const ArgList &Args,
> +                                 std::vector<const char *> &Names, bool
> ForAS);
> +
>  static void AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args,
>                            ArgStringList &CmdArgs) {
>    // Tell the linker to load the plugin. This has to come before
> AddLinkerInputs
> @@ -1560,6 +1564,20 @@
>    std::string CPU = getCPUName(Args, ToolChain.getTriple());
>    if (!CPU.empty())
>      CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=mcpu=") +
> CPU));
> +
> +  std::vector<const char *> Names;
> +  getTargetFeatureList(ToolChain.getDriver(), ToolChain.getTriple(), Args,
> +                       Names, false);
> +
> +  if (!Names.empty()) {
> +    std::string List;
> +    for (unsigned i = 0; i < Names.size() - 1; ++i) {
> +      List.append(Names[i]);
> +      List.append(",");
> +    }
> +    List.append(Names[Names.size() - 1]);
> +    CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=-mattr=") +
> List));
> +  }
>  }
>
>  static void getX86TargetFeatures(const Driver & D,
> @@ -1855,10 +1873,11 @@
>    }
>  }
>
> -static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple,
> -                              const ArgList &Args, ArgStringList &CmdArgs,
> -                              bool ForAS) {
> +static void getTargetFeatureList(const Driver &D, const llvm::Triple
> &Triple,
> +                                 const ArgList &Args,
> +                                 std::vector<const char *> &Names, bool
> ForAS) {
>    std::vector<const char *> Features;
> +
>    switch (Triple.getArch()) {
>    default:
>      break;
> @@ -1911,9 +1930,19 @@
>      unsigned Last = LastI->second;
>      if (Last != I)
>        continue;
> +    Names.push_back(Name);
> +  }
> +}
> +
> +static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple,
> +                              const ArgList &Args, ArgStringList &CmdArgs,
> +                              bool ForAs) {
>
> +  std::vector<const char *> Names;
> +  getTargetFeatureList(D, Triple, Args, Names, ForAs);
> +  for (unsigned i = 0; i < Names.size(); i++) {
>      CmdArgs.push_back("-target-feature");
> -    CmdArgs.push_back(Name);
> +    CmdArgs.push_back(Names[i]);
>    }
>  }
>
> EMAIL PREFERENCES
>   http://reviews.llvm.org/settings/panel/emailpreferences/
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
>
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20150326/34cb22fe/attachment.html>


More information about the cfe-commits mailing list