[llvm-commits] [llvm] r126126 - /llvm/trunk/lib/Target/X86/README.txt
Chris Lattner
sabre at nondot.org
Mon Feb 21 09:03:47 PST 2011
Author: lattner
Date: Mon Feb 21 11:03:47 2011
New Revision: 126126
URL: http://llvm.org/viewvc/llvm-project?rev=126126&view=rev
Log:
a serious "compare CSE" issue that is nontrivial to get right,
but which is responsible for us doing really bad things to 256.bzip2.
Modified:
llvm/trunk/lib/Target/X86/README.txt
Modified: llvm/trunk/lib/Target/X86/README.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/README.txt?rev=126126&r1=126125&r2=126126&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/README.txt (original)
+++ llvm/trunk/lib/Target/X86/README.txt Mon Feb 21 11:03:47 2011
@@ -1878,3 +1878,72 @@
ret
//===---------------------------------------------------------------------===//
+
+The hot loop of 256.bzip2 contains code that looks a bit like this:
+
+int foo(char *P, char *Q, int x, int y) {
+ if (P[0] != Q[0])
+ return P[0] < Q[0];
+ if (P[1] != Q[1])
+ return P[1] < Q[1];
+ if (P[2] != Q[2])
+ return P[2] < Q[2];
+ return P[3] < Q[3];
+}
+
+In the real code, we get a lot more wrong than this. However, even in this
+code we generate:
+
+_foo: ## @foo
+## BB#0: ## %entry
+ movb (%rsi), %al
+ movb (%rdi), %cl
+ cmpb %al, %cl
+ je LBB0_2
+LBB0_1: ## %if.then
+ cmpb %al, %cl
+ jmp LBB0_5
+LBB0_2: ## %if.end
+ movb 1(%rsi), %al
+ movb 1(%rdi), %cl
+ cmpb %al, %cl
+ jne LBB0_1
+## BB#3: ## %if.end38
+ movb 2(%rsi), %al
+ movb 2(%rdi), %cl
+ cmpb %al, %cl
+ jne LBB0_1
+## BB#4: ## %if.end60
+ movb 3(%rdi), %al
+ cmpb 3(%rsi), %al
+LBB0_5: ## %if.end60
+ setl %al
+ movzbl %al, %eax
+ ret
+
+Note that we generate jumps to LBB0_1 which does a redundant compare. The
+redundant compare also forces the register values to be live, which prevents
+folding one of the loads into the compare. In contrast, GCC 4.2 produces:
+
+_foo:
+ movzbl (%rsi), %eax
+ cmpb %al, (%rdi)
+ jne L10
+L12:
+ movzbl 1(%rsi), %eax
+ cmpb %al, 1(%rdi)
+ jne L10
+ movzbl 2(%rsi), %eax
+ cmpb %al, 2(%rdi)
+ jne L10
+ movzbl 3(%rdi), %eax
+ cmpb 3(%rsi), %al
+L10:
+ setl %al
+ movzbl %al, %eax
+ ret
+
+which is "perfect".
+
+//===---------------------------------------------------------------------===//
+
More information about the llvm-commits
mailing list