<p dir="ltr">Eric - this sounds related to your recent work. I assume we should be producing the same asm, but not by passing any flags to the linker, instead by using function attributes?</p>
<div class="gmail_quote">On Mar 26, 2015 3:44 AM, "suyog" <<a href="mailto:sardask01@gmail.com">sardask01@gmail.com</a>> wrote:<br type="attribution"><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Hi void, dexonsmith, samsonov, rafael,<br>
<br>
This is regarding bug 18808 where target feature was set at the front end, but not passed to the linker when using -flto flag (Gold linker)<br>
<br>
The option string "-plugin-opt=-mattr=feature" was not appended in command generated for linker.<br>
<br>
Approach for this patch :<br>
<br>
In lib/tools/clang/lib/Driver/Tools.cpp file<br>
<br>
1. Add a new function getTargetFeatureList() to return the list of target features<br>
2. Modify the getTargetFeature() function to set target-feature separately in clang options<br>
3. In the AddGoldPlugin() function, retrieve the list of target features and append to "-plugin-opt=-mattr=". Multiple features are appended separated by comma.<br>
<br>
No make-check regression observed with the patch.<br>
<br>
Test case mentioned in 18808 :<br>
<br>
$ cat test.c<br>
<br>
#include <stdint.h><br>
#include <stdlib.h><br>
<br>
typedef uint64_t avxreg __attribute__((ext_vector_type(4)));<br>
<br>
int main(int argc, char **argv) {<br>
avxreg x=argc,y=argc;<br>
for (int i=0; i<argc; i++) {<br>
int foo = atoi(argv[i]);<br>
x = y + (avxreg)foo;<br>
y = x;<br>
}<br>
return x.x;<br>
}<br>
<br>
<br>
Assembly generated before patch :<br>
<br>
$ clang -O3 -flto -mavx2 test.c -o test<br>
$ objdump -d test > 1.s<br>
$ cat 1.s<br>
<br>
00000000004005d0 <main>:<br>
4005d0: 55 push %rbp<br>
4005d1: 53 push %rbx<br>
4005d2: 48 83 ec 28 sub $0x28,%rsp<br>
4005d6: 48 89 f3 mov %rsi,%rbx<br>
4005d9: 89 fd mov %edi,%ebp<br>
4005db: 48 63 c5 movslq %ebp,%rax<br>
4005de: 66 48 0f 6e c0 movq %rax,%xmm0<br>
4005e3: 66 0f 70 c0 44 pshufd $0x44,%xmm0,%xmm0<br>
4005e8: 85 c0 test %eax,%eax<br>
4005ea: 7e 51 jle 40063d <main+0x6d><br>
4005ec: 66 0f 6f d0 movdqa %xmm0,%xmm2<br>
4005f0: 66 0f 7f 14 24 movdqa %xmm2,(%rsp)<br>
4005f5: 66 0f 7f 44 24 10 movdqa %xmm0,0x10(%rsp)<br>
4005fb: 48 8b 3b mov (%rbx),%rdi<br>
4005fe: 31 f6 xor %esi,%esi<br>
400600: ba 0a 00 00 00 mov $0xa,%edx<br>
400605: e8 c6 fe ff ff callq 4004d0 <strtol@plt><br>
40060a: 66 0f 6f 14 24 movdqa (%rsp),%xmm2<br>
40060f: 48 98 cltq<br>
400611: 66 48 0f 6e c0 movq %rax,%xmm0<br>
400616: 66 0f 70 c0 44 pshufd $0x44,%xmm0,%xmm0<br>
40061b: 66 0f d4 d0 paddq %xmm0,%xmm2<br>
40061f: 66 0f 6f 4c 24 10 movdqa 0x10(%rsp),%xmm1<br>
400625: 66 0f d4 c8 paddq %xmm0,%xmm1<br>
400629: 66 0f 7f 4c 24 10 movdqa %xmm1,0x10(%rsp)<br>
40062f: 66 0f 6f 44 24 10 movdqa 0x10(%rsp),%xmm0<br>
400635: 48 83 c3 08 add $0x8,%rbx<br>
400639: ff cd dec %ebp<br>
40063b: 75 b3 jne 4005f0 <main+0x20><br>
40063d: 66 48 0f 7e c0 movq %xmm0,%rax<br>
400642: 48 83 c4 28 add $0x28,%rsp<br>
400646: 5b pop %rbx<br>
400647: 5d pop %rbp<br>
400648: c3 retq<br>
400649: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)<br>
<br>
<br>
Assembly generated after patch :<br>
<br>
$ clang -O3 -flto -mavx2 test.c -o test<br>
$ objdump -d test > 2.s<br>
$ cat 2.s<br>
<br>
0000000004005d0 <main>:<br>
4005d0: 55 push %rbp<br>
4005d1: 53 push %rbx<br>
4005d2: 48 83 ec 28 sub $0x28,%rsp<br>
4005d6: 48 89 f3 mov %rsi,%rbx<br>
4005d9: 89 fd mov %edi,%ebp<br>
4005db: 48 63 c5 movslq %ebp,%rax<br>
4005de: c4 e1 f9 6e c0 vmovq %rax,%xmm0<br>
4005e3: c4 e2 7d 19 c0 vbroadcastsd %xmm0,%ymm0<br>
4005e8: 85 c0 test %eax,%eax<br>
4005ea: 7e 42 jle 40062e <main+0x5e><br>
4005ec: 0f 1f 40 00 nopl 0x0(%rax)<br>
4005f0: c5 fc 11 04 24 vmovups %ymm0,(%rsp)<br>
4005f5: 48 8b 3b mov (%rbx),%rdi<br>
4005f8: 31 f6 xor %esi,%esi<br>
4005fa: ba 0a 00 00 00 mov $0xa,%edx<br>
4005ff: c5 f8 77 vzeroupper<br>
400602: e8 c9 fe ff ff callq 4004d0 <strtol@plt><br>
400607: 48 98 cltq<br>
400609: c4 e1 f9 6e c0 vmovq %rax,%xmm0<br>
40060e: c4 e2 7d 59 c0 vpbroadcastq %xmm0,%ymm0<br>
400613: c5 fe 6f 0c 24 vmovdqu (%rsp),%ymm1<br>
400618: c5 fd d4 c9 vpaddq %ymm1,%ymm0,%ymm1<br>
40061c: c5 fe 7f 0c 24 vmovdqu %ymm1,(%rsp)<br>
400621: c5 fc 10 04 24 vmovups (%rsp),%ymm0<br>
400626: 48 83 c3 08 add $0x8,%rbx<br>
40062a: ff cd dec %ebp<br>
40062c: 75 c2 jne 4005f0 <main+0x20><br>
40062e: c4 e1 f9 7e c0 vmovq %xmm0,%rax<br>
400633: 48 83 c4 28 add $0x28,%rsp<br>
400637: 5b pop %rbx<br>
400638: 5d pop %rbp<br>
400639: c5 f8 77 vzeroupper<br>
40063c: c3 retq<br>
40063d: 0f 1f 00 nopl (%rax)<br>
<br>
<br>
Visible clearly from above that avx instructions are generated after applying the patch.<br>
<br>
Can some one please help in reviewing this patch?<br>
Also, can someone help in forming a test case for this patch? I couldn't figure out how to write test case for checking the generation of avx instructions.<br>
<br>
Regards,<br>
Suyog<br>
<br>
REPOSITORY<br>
rL LLVM<br>
<br>
<a href="http://reviews.llvm.org/D8629" target="_blank">http://reviews.llvm.org/D8629</a><br>
<br>
Files:<br>
lib/Driver/Tools.cpp<br>
<br>
Index: lib/Driver/Tools.cpp<br>
===================================================================<br>
--- lib/Driver/Tools.cpp<br>
+++ lib/Driver/Tools.cpp<br>
@@ -1544,6 +1544,10 @@<br>
}<br>
}<br>
<br>
+static void getTargetFeatureList(const Driver &D, const llvm::Triple &Triple,<br>
+ const ArgList &Args,<br>
+ std::vector<const char *> &Names, bool ForAS);<br>
+<br>
static void AddGoldPlugin(const ToolChain &ToolChain, const ArgList &Args,<br>
ArgStringList &CmdArgs) {<br>
// Tell the linker to load the plugin. This has to come before AddLinkerInputs<br>
@@ -1560,6 +1564,20 @@<br>
std::string CPU = getCPUName(Args, ToolChain.getTriple());<br>
if (!CPU.empty())<br>
CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=mcpu=") + CPU));<br>
+<br>
+ std::vector<const char *> Names;<br>
+ getTargetFeatureList(ToolChain.getDriver(), ToolChain.getTriple(), Args,<br>
+ Names, false);<br>
+<br>
+ if (!Names.empty()) {<br>
+ std::string List;<br>
+ for (unsigned i = 0; i < Names.size() - 1; ++i) {<br>
+ List.append(Names[i]);<br>
+ List.append(",");<br>
+ }<br>
+ List.append(Names[Names.size() - 1]);<br>
+ CmdArgs.push_back(Args.MakeArgString(Twine("-plugin-opt=-mattr=") + List));<br>
+ }<br>
}<br>
<br>
static void getX86TargetFeatures(const Driver & D,<br>
@@ -1855,10 +1873,11 @@<br>
}<br>
}<br>
<br>
-static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple,<br>
- const ArgList &Args, ArgStringList &CmdArgs,<br>
- bool ForAS) {<br>
+static void getTargetFeatureList(const Driver &D, const llvm::Triple &Triple,<br>
+ const ArgList &Args,<br>
+ std::vector<const char *> &Names, bool ForAS) {<br>
std::vector<const char *> Features;<br>
+<br>
switch (Triple.getArch()) {<br>
default:<br>
break;<br>
@@ -1911,9 +1930,19 @@<br>
unsigned Last = LastI->second;<br>
if (Last != I)<br>
continue;<br>
+ Names.push_back(Name);<br>
+ }<br>
+}<br>
+<br>
+static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple,<br>
+ const ArgList &Args, ArgStringList &CmdArgs,<br>
+ bool ForAs) {<br>
<br>
+ std::vector<const char *> Names;<br>
+ getTargetFeatureList(D, Triple, Args, Names, ForAs);<br>
+ for (unsigned i = 0; i < Names.size(); i++) {<br>
CmdArgs.push_back("-target-feature");<br>
- CmdArgs.push_back(Name);<br>
+ CmdArgs.push_back(Names[i]);<br>
}<br>
}<br>
<br>
EMAIL PREFERENCES<br>
<a href="http://reviews.llvm.org/settings/panel/emailpreferences/" target="_blank">http://reviews.llvm.org/settings/panel/emailpreferences/</a><br>
<br>_______________________________________________<br>
cfe-commits mailing list<br>
<a href="mailto:cfe-commits@cs.uiuc.edu">cfe-commits@cs.uiuc.edu</a><br>
<a href="http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits" target="_blank">http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits</a><br>
<br></blockquote></div>