[llvm-commits] [llvm] r147604 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp test/CodeGen/X86/shift-folding.ll
Chandler Carruth
chandlerc at gmail.com
Thu Jan 5 03:05:55 PST 2012
Author: chandlerc
Date: Thu Jan 5 05:05:55 2012
New Revision: 147604
URL: http://llvm.org/viewvc/llvm-project?rev=147604&view=rev
Log:
Prevent a DAGCombine from firing where there are two uses of
a combined-away node and the result of the combine isn't substantially
smaller than the input, it's just canonicalized. This is the first part
of a significant (7%) performance gain for Snappy's hot decompression
loop.
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/test/CodeGen/X86/shift-folding.ll
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=147604&r1=147603&r2=147604&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Jan 5 05:05:55 2012
@@ -3331,7 +3331,9 @@
// fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
// (and (srl x, (sub c1, c2), MASK)
- if (N1C && N0.getOpcode() == ISD::SRL &&
+ // Only fold this if the inner shift has no other uses -- if it does, folding
+ // this will increase the total number of instructions.
+ if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse() &&
N0.getOperand(1).getOpcode() == ISD::Constant) {
uint64_t c1 = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
if (c1 < VT.getSizeInBits()) {
Modified: llvm/trunk/test/CodeGen/X86/shift-folding.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shift-folding.ll?rev=147604&r1=147603&r2=147604&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shift-folding.ll (original)
+++ llvm/trunk/test/CodeGen/X86/shift-folding.ll Thu Jan 5 05:05:55 2012
@@ -48,3 +48,23 @@
%tmp512 = lshr i32 %tmp4, 24
ret i32 %tmp512
}
+
+define i64 @test5(i16 %i, i32* %arr) {
+; Ensure that we don't fold away shifts which have multiple uses, as they are
+; just re-introduced for the second use.
+; CHECK: test5:
+; CHECK-NOT: shrl
+; CHECK: shrl $11
+; CHECK-NOT: shrl
+; CHECK: ret
+
+entry:
+ %i.zext = zext i16 %i to i32
+ %index = lshr i32 %i.zext, 11
+ %index.zext = zext i32 %index to i64
+ %val.ptr = getelementptr inbounds i32* %arr, i64 %index.zext
+ %val = load i32* %val.ptr
+ %val.zext = zext i32 %val to i64
+ %sum = add i64 %val.zext, %index.zext
+ ret i64 %sum
+}
More information about the llvm-commits
mailing list