[llvm-commits] CVS: llvm/lib/Target/X86/README.txt

Chris Lattner lattner at cs.uiuc.edu
Fri Apr 21 14:03:33 PDT 2006



Changes in directory llvm/lib/Target/X86:

README.txt updated: 1.93 -> 1.94
---
Log message:

add some low-prio notes


---
Diffs of the changes:  (+69 -0)

 README.txt |   69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 69 insertions(+)


Index: llvm/lib/Target/X86/README.txt
diff -u llvm/lib/Target/X86/README.txt:1.93 llvm/lib/Target/X86/README.txt:1.94
--- llvm/lib/Target/X86/README.txt:1.93	Wed Apr 19 00:53:27 2006
+++ llvm/lib/Target/X86/README.txt	Fri Apr 21 16:03:21 2006
@@ -1054,3 +1054,72 @@
 
 //===---------------------------------------------------------------------===//
 
+Consider:
+
+__m128 test(float a) {
+  return _mm_set_ps(0.0, 0.0, 0.0, a*a);
+}
+
+This compiles into:
+
+movss 4(%esp), %xmm1
+mulss %xmm1, %xmm1
+xorps %xmm0, %xmm0
+movss %xmm1, %xmm0
+ret
+
+Because mulss multiplies 0*0 = 0.0, the top elements of xmm1 are already zerod.
+We could compile this to:
+
+movss 4(%esp), %xmm0
+mulss %xmm0, %xmm0
+ret
+
+//===---------------------------------------------------------------------===//
+
+Here's a sick and twisted idea.  Consider code like this:
+
+__m128 test(__m128 a) {
+  float b = *(float*)&A;
+  ...
+  return _mm_set_ps(0.0, 0.0, 0.0, b);
+}
+
+This might compile to this code:
+
+movaps c(%esp), %xmm1
+xorps %xmm0, %xmm0
+movss %xmm1, %xmm0
+ret
+
+Now consider if the ... code caused xmm1 to get spilled.  This might produce
+this code:
+
+movaps c(%esp), %xmm1
+movaps %xmm1, c2(%esp)
+...
+
+xorps %xmm0, %xmm0
+movaps c2(%esp), %xmm1
+movss %xmm1, %xmm0
+ret
+
+However, since the reload is only used by these instructions, we could 
+"fold" it into the uses, producing something like this:
+
+movaps c(%esp), %xmm1
+movaps %xmm1, c2(%esp)
+...
+
+movss c2(%esp), %xmm0
+ret
+
+... saving two instructions.
+
+The basic idea is that a reload from a spill slot, can, if only one 4-byte 
+chunk is used, bring in 3 zeros the the one element instead of 4 elements.
+This can be used to simplify a variety of shuffle operations, where the
+elements are fixed zeros.
+
+//===---------------------------------------------------------------------===//
+






More information about the llvm-commits mailing list