[llvm-commits] CVS: llvm/lib/Target/X86/README.txt
Chris Lattner
lattner at cs.uiuc.edu
Fri Apr 21 14:03:33 PDT 2006
Changes in directory llvm/lib/Target/X86:
README.txt updated: 1.93 -> 1.94
---
Log message:
add some low-prio notes
---
Diffs of the changes: (+69 -0)
README.txt | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 69 insertions(+)
Index: llvm/lib/Target/X86/README.txt
diff -u llvm/lib/Target/X86/README.txt:1.93 llvm/lib/Target/X86/README.txt:1.94
--- llvm/lib/Target/X86/README.txt:1.93 Wed Apr 19 00:53:27 2006
+++ llvm/lib/Target/X86/README.txt Fri Apr 21 16:03:21 2006
@@ -1054,3 +1054,72 @@
//===---------------------------------------------------------------------===//
+Consider:
+
+__m128 test(float a) {
+ return _mm_set_ps(0.0, 0.0, 0.0, a*a);
+}
+
+This compiles into:
+
+movss 4(%esp), %xmm1
+mulss %xmm1, %xmm1
+xorps %xmm0, %xmm0
+movss %xmm1, %xmm0
+ret
+
+Because mulss multiplies 0*0 = 0.0, the top elements of xmm1 are already zerod.
+We could compile this to:
+
+movss 4(%esp), %xmm0
+mulss %xmm0, %xmm0
+ret
+
+//===---------------------------------------------------------------------===//
+
+Here's a sick and twisted idea. Consider code like this:
+
+__m128 test(__m128 a) {
+ float b = *(float*)&A;
+ ...
+ return _mm_set_ps(0.0, 0.0, 0.0, b);
+}
+
+This might compile to this code:
+
+movaps c(%esp), %xmm1
+xorps %xmm0, %xmm0
+movss %xmm1, %xmm0
+ret
+
+Now consider if the ... code caused xmm1 to get spilled. This might produce
+this code:
+
+movaps c(%esp), %xmm1
+movaps %xmm1, c2(%esp)
+...
+
+xorps %xmm0, %xmm0
+movaps c2(%esp), %xmm1
+movss %xmm1, %xmm0
+ret
+
+However, since the reload is only used by these instructions, we could
+"fold" it into the uses, producing something like this:
+
+movaps c(%esp), %xmm1
+movaps %xmm1, c2(%esp)
+...
+
+movss c2(%esp), %xmm0
+ret
+
+... saving two instructions.
+
+The basic idea is that a reload from a spill slot, can, if only one 4-byte
+chunk is used, bring in 3 zeros the the one element instead of 4 elements.
+This can be used to simplify a variety of shuffle operations, where the
+elements are fixed zeros.
+
+//===---------------------------------------------------------------------===//
+
More information about the llvm-commits
mailing list