[llvm-commits] [llvm] r105674 - /llvm/trunk/lib/Target/X86/README-X86-64.txt
Eli Friedman
eli.friedman at gmail.com
Tue Jun 8 19:43:17 PDT 2010
Author: efriedma
Date: Tue Jun 8 21:43:17 2010
New Revision: 105674
URL: http://llvm.org/viewvc/llvm-project?rev=105674&view=rev
Log:
A few new x86-64 specific README entries.
Modified:
llvm/trunk/lib/Target/X86/README-X86-64.txt
Modified: llvm/trunk/lib/Target/X86/README-X86-64.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/README-X86-64.txt?rev=105674&r1=105673&r2=105674&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/README-X86-64.txt (original)
+++ llvm/trunk/lib/Target/X86/README-X86-64.txt Tue Jun 8 21:43:17 2010
@@ -74,6 +74,15 @@
movq %rax, (%rdx)
ret
+And the codegen is even worse for the following
+(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=33103):
+ void fill1(char *s, int a)
+ {
+ __builtin_memset(s, a, 15);
+ }
+
+For this version, we duplicate the computation of the constant to store.
+
//===---------------------------------------------------------------------===//
It's not possible to reference AH, BH, CH, and DH registers in an instruction
@@ -158,3 +167,76 @@
if we have whole-function selectiondags.
//===---------------------------------------------------------------------===//
+
+Take the following C code
+(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43640):
+
+struct u1
+{
+ float x;
+ float y;
+};
+
+float foo(struct u1 u)
+{
+ return u.x + u.y;
+}
+
+Optimizes to the following IR:
+define float @foo(double %u.0) nounwind readnone {
+entry:
+ %tmp8 = bitcast double %u.0 to i64 ; <i64> [#uses=2]
+ %tmp6 = trunc i64 %tmp8 to i32 ; <i32> [#uses=1]
+ %tmp7 = bitcast i32 %tmp6 to float ; <float> [#uses=1]
+ %tmp2 = lshr i64 %tmp8, 32 ; <i64> [#uses=1]
+ %tmp3 = trunc i64 %tmp2 to i32 ; <i32> [#uses=1]
+ %tmp4 = bitcast i32 %tmp3 to float ; <float> [#uses=1]
+ %0 = fadd float %tmp7, %tmp4 ; <float> [#uses=1]
+ ret float %0
+}
+
+And current llvm-gcc/clang output:
+ movd %xmm0, %rax
+ movd %eax, %xmm1
+ shrq $32, %rax
+ movd %eax, %xmm0
+ addss %xmm1, %xmm0
+ ret
+
+We really shouldn't move the floats to RAX, only to immediately move them
+straight back to the XMM registers.
+
+There really isn't any good way to handle this purely in IR optimizers; it
+could possibly be handled by changing the output of the fronted, though. It
+would also be feasible to add a x86-specific DAGCombine to optimize the
+bitcast+trunc+(lshr+)bitcast combination.
+
+//===---------------------------------------------------------------------===//
+
+Take the following code
+(from http://gcc.gnu.org/bugzilla/show_bug.cgi?id=34653):
+extern unsigned long table[];
+unsigned long foo(unsigned char *p) {
+ unsigned long tag = *p;
+ return table[tag >> 4] + table[tag & 0xf];
+}
+
+Current code generated:
+ movzbl (%rdi), %eax
+ movq %rax, %rcx
+ andq $240, %rcx
+ shrq %rcx
+ andq $15, %rax
+ movq table(,%rax,8), %rax
+ addq table(%rcx), %rax
+ ret
+
+Issues:
+1. First movq should be movl; saves a byte.
+2. Both andq's should be andl; saves another two bytes. I think this was
+ implemented at one point, but subsequently regressed.
+3. shrq should be shrl; saves another byte.
+4. The first andq can be completely eliminated by using a slightly more
+ expensive addressing mode.
+
+//===---------------------------------------------------------------------===//
More information about the llvm-commits
mailing list