<div dir="ltr">Very much so. Right now, this works without doing this:<br><br><div>dzur:~/tmp> cat bar.c</div><div>#include <stdint.h></div><div>#include <stdlib.h></div><div><br></div><div>typedef uint64_t avxreg __attribute__((ext_vector_type(4)));</div><div><br></div><div>int main(int argc, char **argv) {</div><div> avxreg x=argc,y=argc;</div><div> for (int i=0; i<argc; i++) {</div><div> int foo = atoi(argv[i]);</div><div> x = y + (avxreg)foo;</div><div> y = x;</div><div> }</div><div> return x.x;</div><div>}</div><div>dzur:~/tmp> ~/builds/build-llvm/Debug+Asserts/bin/clang -O3 -flto -mavx2 bar.c</div><div><div>dzur:~/tmp> llvm-objdump -disassemble a.out | more</div></div><div><br></div><div><div>main:</div><div> 400520: 55 pushq %rbp</div><div> 400521: 53 pushq %rbx</div><div> 400522: 48 83 ec 28 subq $40, %rsp</div><div> 400526: 48 89 f3 movq %rsi, %rbx</div><div> 400529: 89 fd movl %edi, %ebp</div><div> 40052b: 48 63 c5 movslq %ebp, %rax</div><div> 40052e: c4 e1 f9 6e c0 vmovq %rax, %xmm0</div><div> 400533: c4 e2 7d 19 c0 vbroadcastsd %xmm0, %ymm0</div><div> 400538: 85 c0 testl %eax, %eax</div><div> 40053a: 7e 42 jle 66</div><div> 40053c: 0f 1f 40 00 nopl (%rax)</div><div> 400540: c5 fc 11 04 24 vmovups %ymm0, (%rsp)</div><div> 400545: 48 8b 3b movq (%rbx), %rdi</div><div> 400548: 31 f6 xorl %esi, %esi</div><div> 40054a: ba 0a 00 00 00 movl $10, %edx</div><div> 40054f: c5 f8 77 vzeroupper</div><div> 400552: e8 b9 fe ff ff callq -327</div><div> 400557: 48 98 cltq</div><div> 400559: c4 e1 f9 6e c0 vmovq %rax, %xmm0</div><div> 40055e: c4 e2 7d 59 c0 vpbroadcastq %xmm0, %ymm0</div><div> 400563: c5 fe 6f 0c 24 vmovdqu (%rsp), %ymm1</div><div> 400568: c5 fd d4 c9 vpaddq %ymm1, %ymm0, %ymm1</div><div> 40056c: c5 fe 7f 0c 24 vmovdqu %ymm1, (%rsp)</div><div> 400571: c5 fc 10 04 24 vmovups (%rsp), %ymm0</div><div> 400576: 48 83 c3 08 addq $8, %rbx</div><div> 40057a: ff cd decl %ebp</div><div> 40057c: 75 c2 jne -62</div><div> 40057e: c4 e1 f9 7e c0 vmovq %xmm0, %rax</div><div> 400583: 48 83 c4 28 addq $40, %rsp</div><div> 400587: 5b popq %rbx</div><div> 400588: 5d popq %rbp</div><div> 400589: c5 f8 77 vzeroupper</div><div> 40058c: c3 retq</div><div> 40058d: 0f 1f 00 nopl (%rax)</div></div><div><br></div><div>which should be precisely what's expected.</div><div><br></div><div>If you look at the IR generated from clang:</div><div><br></div><div><div>dzur:~/tmp> ~/builds/build-llvm/Debug+Asserts/bin/clang -O3 -flto -mavx2 bar.c -S -o -</div><div>; ModuleID = 'bar.c'</div><div>target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"</div><div>target triple = "x86_64-unknown-linux-gnu"</div><div><br></div><div>; Function Attrs: nounwind uwtable</div><div>define i32 @main(i32 %argc, i8** nocapture readonly %argv) #0 {</div><div>entry:</div><div> %conv = sext i32 %argc to i64</div><div> %splat.splatinsert = insertelement <4 x i64> undef, i64 %conv, i32 0</div><div> %splat.splat = shufflevector <4 x i64> %splat.splatinsert, <4 x i64> undef, <4 x i32> zeroinitializer</div><div> %cmp14 = icmp sgt i32 %argc, 0</div><div> br i1 %cmp14, label %for.body.preheader, label %for.end</div><div><br></div><div>for.body.preheader: ; preds = %entry</div><div> br label %for.body</div><div><br></div><div>for.body: ; preds = %for.body.preheader, %for.body</div><div> %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]</div><div> %y.015 = phi <4 x i64> [ %add, %for.body ], [ %splat.splat, %for.body.preheader ]</div><div> %arrayidx = getelementptr inbounds i8*, i8** %argv, i64 %indvars.iv</div><div> %0 = load i8*, i8** %arrayidx, align 8, !tbaa !1</div><div> %call.i = tail call i64 @strtol(i8* nocapture %0, i8** null, i32 10) #2</div><div> %sext = shl i64 %call.i, 32</div><div> %conv5 = ashr exact i64 %sext, 32</div><div> %splat.splatinsert6 = insertelement <4 x i64> undef, i64 %conv5, i32 0</div><div> %splat.splat7 = shufflevector <4 x i64> %splat.splatinsert6, <4 x i64> undef, <4 x i32> zeroinitializer</div><div> %add = add <4 x i64> %splat.splat7, %y.015</div><div> %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1</div><div> %lftr.wideiv = trunc i64 %indvars.iv.next to i32</div><div> %exitcond = icmp eq i32 %lftr.wideiv, %argc</div><div> br i1 %exitcond, label %for.end.loopexit, label %for.body</div><div><br></div><div>for.end.loopexit: ; preds = %for.body</div><div> %add.lcssa = phi <4 x i64> [ %add, %for.body ]</div><div> br label %for.end</div><div><br></div><div>for.end: ; preds = %for.end.loopexit, %entry</div><div> %y.0.lcssa = phi <4 x i64> [ %splat.splat, %entry ], [ %add.lcssa, %for.end.loopexit ]</div><div> %1 = extractelement <4 x i64> %y.0.lcssa, i64 0</div><div> %conv8 = trunc i64 %1 to i32</div><div> ret i32 %conv8</div><div>}</div><div><br></div><div>; Function Attrs: nounwind</div><div>declare i64 @strtol(i8* readonly, i8** nocapture, i32) #1</div><div><br></div><div>attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse4.2,+avx2,+ssse3,+sse3,+sse,+sse2,+sse4.1,+avx,+popcnt" "unsafe-fp-math"="false" "use-soft-float"="false" }</div><div>attributes #1 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+sse4.2,+avx2,+ssse3,+sse3,+sse,+sse2,+sse4.1,+avx,+popcnt" "unsafe-fp-math"="false" "use-soft-float"="false" }</div><div>attributes #2 = { nounwind }</div><div><br></div><div>!llvm.ident = !{!0}</div><div><br></div><div>!0 = !{!"clang version 3.7.0 (trunk 233238) (llvm/trunk 233240)"}</div><div>!1 = !{!2, !2, i64 0}</div><div>!2 = !{!"any pointer", !3, i64 0}</div><div>!3 = !{!"omnipotent char", !4, i64 0}</div><div>!4 = !{!"Simple C/C++ TBAA"}</div></div><div><br></div><div>You'll see that the target-features for the function are set to the appropriate level of avx.</div><div><br></div><div>I'll take a look at the PR to make sure we can just close it.</div><div><br></div><div>-eric</div><div><br></div></div><br><div class="gmail_quote">On Thu, Mar 26, 2015 at 4:23 PM David Blaikie <<a href="mailto:dblaikie@gmail.com">dblaikie@gmail.com</a>> wrote:<br><blockquote class="gmail_quote" style="margin:0 0 0 .8ex;border-left:1px #ccc solid;padding-left:1ex">Eric - this sounds related to your recent work. I assume we should be<br>
producing the same asm, but not by passing any flags to the linker, instead<br>
by using function attributes?<br>
<br>
<br>
REPOSITORY<br>
rL LLVM<br>
<br>
<a href="http://reviews.llvm.org/D8629" target="_blank">http://reviews.llvm.org/D8629</a><br>
<br>
EMAIL PREFERENCES<br>
<a href="http://reviews.llvm.org/settings/panel/emailpreferences/" target="_blank">http://reviews.llvm.org/<u></u>settings/panel/<u></u>emailpreferences/</a><br>
<br>
<br>
<br>
______________________________<u></u>_________________<br>
cfe-commits mailing list<br>
<a href="mailto:cfe-commits@cs.uiuc.edu" target="_blank">cfe-commits@cs.uiuc.edu</a><br>
<a href="http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits" target="_blank">http://lists.cs.uiuc.edu/<u></u>mailman/listinfo/cfe-commits</a><br>
</blockquote></div>