[llvm-commits] [llvm] r44656 - in /llvm/trunk: lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/Target/README.txt test/CodeGen/X86/shift-combine.ll
Chris Lattner
sabre at nondot.org
Wed Dec 5 23:33:36 PST 2007
Author: lattner
Date: Thu Dec 6 01:33:36 2007
New Revision: 44656
URL: http://llvm.org/viewvc/llvm-project?rev=44656&view=rev
Log:
implement a readme entry, compiling the code into:
_foo:
movl $12, %eax
andl 4(%esp), %eax
movl _array(%eax), %eax
ret
instead of:
_foo:
movl 4(%esp), %eax
shrl $2, %eax
andl $3, %eax
movl _array(,%eax,4), %eax
ret
As it turns out, this triggers all the time, in a wide variety of
situations, for example, I see diffs like this in various programs:
- movl 8(%eax), %eax
- shll $2, %eax
- andl $1020, %eax
- movl (%esi,%eax), %eax
+ movzbl 8(%eax), %eax
+ movl (%esi,%eax,4), %eax
- shll $2, %edx
- andl $1020, %edx
- movl (%edi,%edx), %edx
+ andl $255, %edx
+ movl (%edi,%edx,4), %edx
Unfortunately, I also see stuff like this, which can be fixed in the
X86 backend:
- andl $85, %ebx
- addl _bit_count(,%ebx,4), %ebp
+ shll $2, %ebx
+ andl $340, %ebx
+ addl _bit_count(%ebx), %ebp
Added:
llvm/trunk/test/CodeGen/X86/shift-combine.ll
Modified:
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/lib/Target/README.txt
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=44656&r1=44655&r2=44656&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Thu Dec 6 01:33:36 2007
@@ -9,22 +9,6 @@
//
// This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run
// both before and after the DAG is legalized.
-//
-// FIXME: Missing folds
-// sdiv, udiv, srem, urem (X, const) where X is an integer can be expanded into
-// a sequence of multiplies, shifts, and adds. This should be controlled by
-// some kind of hint from the target that int div is expensive.
-// various folds of mulh[s,u] by constants such as -1, powers of 2, etc.
-//
-// FIXME: select C, pow2, pow2 -> something smart
-// FIXME: trunc(select X, Y, Z) -> select X, trunc(Y), trunc(Z)
-// FIXME: Dead stores -> nuke
-// FIXME: shr X, (and Y,31) -> shr X, Y (TRICKY!)
-// FIXME: mul (x, const) -> shifts + adds
-// FIXME: undef values
-// FIXME: divide by zero is currently left unfolded. do we want to turn this
-// into an undef?
-// FIXME: select ne (select cc, 1, 0), 0, true, false -> select cc, true, false
//
//===----------------------------------------------------------------------===//
@@ -280,6 +264,8 @@
SDOperand XformToShuffleWithZero(SDNode *N);
SDOperand ReassociateOps(unsigned Opc, SDOperand LHS, SDOperand RHS);
+ SDOperand visitShiftByConstant(SDNode *N, unsigned Amt);
+
bool SimplifySelectOps(SDNode *SELECT, SDOperand LHS, SDOperand RHS);
SDOperand SimplifyBinOpWithSameOpcodeHands(SDNode *N);
SDOperand SimplifySelect(SDOperand N0, SDOperand N1, SDOperand N2);
@@ -2145,6 +2131,64 @@
return SDOperand();
}
+/// visitShiftByConstant - Handle transforms common to the three shifts, when
+/// the shift amount is a constant.
+SDOperand DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) {
+ SDNode *LHS = N->getOperand(0).Val;
+ if (!LHS->hasOneUse()) return SDOperand();
+
+ // We want to pull some binops through shifts, so that we have (and (shift))
+ // instead of (shift (and)), likewise for add, or, xor, etc. This sort of
+ // thing happens with address calculations, so it's important to canonicalize
+ // it.
+ bool HighBitSet = false; // Can we transform this if the high bit is set?
+
+ switch (LHS->getOpcode()) {
+ default: return SDOperand();
+ case ISD::OR:
+ case ISD::XOR:
+ HighBitSet = false; // We can only transform sra if the high bit is clear.
+ break;
+ case ISD::AND:
+ HighBitSet = true; // We can only transform sra if the high bit is set.
+ break;
+ case ISD::ADD:
+ if (N->getOpcode() != ISD::SHL)
+ return SDOperand(); // only shl(add) not sr[al](add).
+ HighBitSet = false; // We can only transform sra if the high bit is clear.
+ break;
+ }
+
+ // We require the RHS of the binop to be a constant as well.
+ ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1));
+ if (!BinOpCst) return SDOperand();
+
+ MVT::ValueType VT = N->getValueType(0);
+
+ // If this is a signed shift right, and the high bit is modified
+ // by the logical operation, do not perform the transformation.
+ // The highBitSet boolean indicates the value of the high bit of
+ // the constant which would cause it to be modified for this
+ // operation.
+ if (N->getOpcode() == ISD::SRA) {
+ uint64_t BinOpRHSSign = BinOpCst->getValue() >> MVT::getSizeInBits(VT)-1;
+ if ((bool)BinOpRHSSign != HighBitSet)
+ return SDOperand();
+ }
+
+ // Fold the constants, shifting the binop RHS by the shift amount.
+ SDOperand NewRHS = DAG.getNode(N->getOpcode(), N->getValueType(0),
+ LHS->getOperand(1), N->getOperand(1));
+
+ // Create the new shift.
+ SDOperand NewShift = DAG.getNode(N->getOpcode(), VT, LHS->getOperand(0),
+ N->getOperand(1));
+
+ // Create the new binop.
+ return DAG.getNode(LHS->getOpcode(), VT, NewShift, NewRHS);
+}
+
+
SDOperand DAGCombiner::visitSHL(SDNode *N) {
SDOperand N0 = N->getOperand(0);
SDOperand N1 = N->getOperand(1);
@@ -2199,7 +2243,8 @@
if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1))
return DAG.getNode(ISD::AND, VT, N0.getOperand(0),
DAG.getConstant(~0ULL << N1C->getValue(), VT));
- return SDOperand();
+
+ return N1C ? visitShiftByConstant(N, N1C->getValue()) : SDOperand();
}
SDOperand DAGCombiner::visitSRA(SDNode *N) {
@@ -2259,7 +2304,8 @@
// If the sign bit is known to be zero, switch this to a SRL.
if (DAG.MaskedValueIsZero(N0, MVT::getIntVTSignBit(VT)))
return DAG.getNode(ISD::SRL, VT, N0, N1);
- return SDOperand();
+
+ return N1C ? visitShiftByConstant(N, N1C->getValue()) : SDOperand();
}
SDOperand DAGCombiner::visitSRL(SDNode *N) {
@@ -2353,7 +2399,7 @@
if (N1C && SimplifyDemandedBits(SDOperand(N, 0)))
return SDOperand(N, 0);
- return SDOperand();
+ return N1C ? visitShiftByConstant(N, N1C->getValue()) : SDOperand();
}
SDOperand DAGCombiner::visitCTLZ(SDNode *N) {
Modified: llvm/trunk/lib/Target/README.txt
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/README.txt?rev=44656&r1=44655&r2=44656&view=diff
==============================================================================
--- llvm/trunk/lib/Target/README.txt (original)
+++ llvm/trunk/lib/Target/README.txt Thu Dec 6 01:33:36 2007
@@ -464,41 +464,3 @@
}
//===---------------------------------------------------------------------===//
-on this code:
-
-unsigned array[4];
-unsigned foo(unsigned long x) {
- return array[(x>>2)&3ul];
-}
-
-we should dag combine the left+right shift together. We currently get:
-
-_foo:
- movl 4(%esp), %eax
- shrl $2, %eax
- andl $3, %eax
- movl _array(,%eax,4), %eax
- ret
-
-similar on ppc:
-
-_foo:
- lis r2, ha16(_array)
- srwi r3, r3, 2
- la r2, lo16(_array)(r2)
- rlwinm r3, r3, 2, 28, 29
- lwzx r3, r2, r3
- blr
-
-similar on arm:
-
-_foo:
- mov r3, #3
- and r3, r3, r0, lsr #2
- ldr r2, LCPI1_0
- ldr r0, [r2, +r3, lsl #2]
- bx lr
-
-this is a trivial dag combine xform.
-
-//===---------------------------------------------------------------------===//
Added: llvm/trunk/test/CodeGen/X86/shift-combine.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/shift-combine.ll?rev=44656&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/shift-combine.ll (added)
+++ llvm/trunk/test/CodeGen/X86/shift-combine.ll Thu Dec 6 01:33:36 2007
@@ -0,0 +1,15 @@
+; RUN: llvm-as < %s | llc | not grep shrl
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i686-apple-darwin8"
+ at array = weak global [4 x i32] zeroinitializer ; <[4 x i32]*> [#uses=1]
+
+define i32 @foo(i32 %x) {
+entry:
+ %tmp2 = lshr i32 %x, 2 ; <i32> [#uses=1]
+ %tmp3 = and i32 %tmp2, 3 ; <i32> [#uses=1]
+ %tmp4 = getelementptr [4 x i32]* @array, i32 0, i32 %tmp3 ; <i32*> [#uses=1]
+ %tmp5 = load i32* %tmp4, align 4 ; <i32> [#uses=1]
+ ret i32 %tmp5
+}
+
More information about the llvm-commits
mailing list