[llvm-commits] [llvm] r72507 - in /llvm/trunk: include/llvm/Target/TargetLowering.h lib/CodeGen/SelectionDAG/DAGCombiner.cpp lib/Target/X86/X86ISelLowering.cpp lib/Target/X86/X86ISelLowering.h test/CodeGen/X86/narrow_op-1.ll test/CodeGen/X86/narrow_op-2.ll
Evan Cheng
evan.cheng at apple.com
Wed May 27 17:35:15 PDT 2009
Author: evancheng
Date: Wed May 27 19:35:15 2009
New Revision: 72507
URL: http://llvm.org/viewvc/llvm-project?rev=72507&view=rev
Log:
Added optimization that narrow load / op / store and the 'op' is a bit twiddling instruction and its second operand is an immediate. If bits that are touched by 'op' can be done with a narrower instruction, reduce the width of the load and store as well. This happens a lot with bitfield manipulation code.
e.g.
orl $65536, 8(%rax)
=>
orb $1, 10(%rax)
Since narrowing is not always a win, e.g. i32 -> i16 is a loss on x86, dag combiner consults with the target before performing the optimization.
Added:
llvm/trunk/test/CodeGen/X86/narrow_op-1.ll
llvm/trunk/test/CodeGen/X86/narrow_op-2.ll
Modified:
llvm/trunk/include/llvm/Target/TargetLowering.h
llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
llvm/trunk/lib/Target/X86/X86ISelLowering.h
Modified: llvm/trunk/include/llvm/Target/TargetLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Target/TargetLowering.h?rev=72507&r1=72506&r2=72507&view=diff
==============================================================================
--- llvm/trunk/include/llvm/Target/TargetLowering.h (original)
+++ llvm/trunk/include/llvm/Target/TargetLowering.h Wed May 27 19:35:15 2009
@@ -1420,6 +1420,13 @@
return false;
}
+ /// isNarrowingProfitable - Return true if it's profitable to narrow
+ /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
+ /// from i32 to i8 but not from i32 to i16.
+ virtual bool isNarrowingProfitable(MVT VT1, MVT VT2) const {
+ return true;
+ }
+
//===--------------------------------------------------------------------===//
// Div utility functions
//
Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=72507&r1=72506&r2=72507&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
+++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Wed May 27 19:35:15 2009
@@ -41,6 +41,7 @@
STATISTIC(NodesCombined , "Number of dag nodes combined");
STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
+STATISTIC(OpsNarrowed , "Number of load/op/store narrowed");
namespace {
static cl::opt<bool>
@@ -222,6 +223,7 @@
SDValue BuildUDIV(SDNode *N);
SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL);
SDValue ReduceLoadWidth(SDNode *N);
+ SDValue ReduceLoadOpStoreWidth(SDNode *N);
SDValue GetDemandedBits(SDValue V, const APInt &Mask);
@@ -4900,6 +4902,96 @@
return SDValue();
}
+
+/// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is
+/// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some
+/// of the loaded bits, try narrowing the load and store if it would end up
+/// being a win for performance or code size.
+SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
+ StoreSDNode *ST = cast<StoreSDNode>(N);
+ SDValue Chain = ST->getChain();
+ SDValue Value = ST->getValue();
+ SDValue Ptr = ST->getBasePtr();
+ MVT VT = Value.getValueType();
+
+ if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
+ return SDValue(0, 0);
+
+ unsigned Opc = Value.getOpcode();
+ if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
+ Value.getOperand(1).getOpcode() != ISD::Constant)
+ return SDValue(0, 0);
+
+ SDValue N0 = Value.getOperand(0);
+ if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse()) {
+ LoadSDNode *LD = cast<LoadSDNode>(N0);
+ if (LD->getBasePtr() != Ptr/* || Chain != N0.getValue(1)*/)
+ return SDValue(0, 0);
+
+ // Find the type to narrow it the load / op / store to.
+ SDValue N1 = Value.getOperand(1);
+ unsigned BitWidth = N1.getValueSizeInBits();
+ APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
+ if (Opc == ISD::AND)
+ Imm ^= APInt::getAllOnesValue(BitWidth);
+ unsigned ShAmt = Imm.countTrailingZeros();
+ unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
+ unsigned NewBW = NextPowerOf2(MSB - ShAmt);
+ MVT NewVT = MVT::getIntegerVT(NewBW);
+ while (NewBW < BitWidth &&
+ !(TLI.isTypeLegal(NewVT) &&
+ TLI.isOperationLegalOrCustom(Opc, NewVT) &&
+ TLI.isNarrowingProfitable(VT, NewVT))) {
+ NewBW = NextPowerOf2(NewBW);
+ NewVT = MVT::getIntegerVT(NewBW);
+ }
+ if (NewBW == BitWidth)
+ return SDValue(0, 0);
+
+ // If the lsb changed does not start at the type bitwidth boundary,
+ // start at the previous one.
+ if (ShAmt % NewBW)
+ ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
+ APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW);
+ if ((Imm & Mask) == Imm) {
+ APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
+ if (Opc == ISD::AND)
+ NewImm ^= APInt::getAllOnesValue(NewBW);
+ uint64_t PtrOff = ShAmt / 8;
+ // For big endian targets, we need to adjust the offset to the pointer to
+ // load the correct bytes.
+ if (TLI.isBigEndian())
+ PtrOff = (BitWidth - NewBW) / 8 - PtrOff;
+
+ unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
+ SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(),
+ Ptr.getValueType(), Ptr,
+ DAG.getConstant(PtrOff, Ptr.getValueType()));
+ SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(),
+ LD->getChain(), NewPtr,
+ LD->getSrcValue(), LD->getSrcValueOffset(),
+ LD->isVolatile(), NewAlign);
+ SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD,
+ DAG.getConstant(NewImm, NewVT));
+ SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(),
+ NewVal, NewPtr,
+ ST->getSrcValue(), ST->getSrcValueOffset(),
+ ST->isVolatile(), NewAlign);
+
+ AddToWorkList(NewPtr.getNode());
+ AddToWorkList(NewLD.getNode());
+ AddToWorkList(NewVal.getNode());
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1),
+ &DeadNodes);
+ ++OpsNarrowed;
+ return NewST;
+ }
+ }
+
+ return SDValue(0, 0);
+}
+
SDValue DAGCombiner::visitSTORE(SDNode *N) {
StoreSDNode *ST = cast<StoreSDNode>(N);
SDValue Chain = ST->getChain();
@@ -5086,7 +5178,7 @@
ST->isVolatile(), ST->getAlignment());
}
- return SDValue();
+ return ReduceLoadOpStoreWidth(N);
}
SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=72507&r1=72506&r2=72507&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Wed May 27 19:35:15 2009
@@ -6877,6 +6877,11 @@
return VT1 == MVT::i32 && VT2 == MVT::i64 && Subtarget->is64Bit();
}
+bool X86TargetLowering::isNarrowingProfitable(MVT VT1, MVT VT2) const {
+ // i16 instructions are longer (0x66 prefix) and potentially slower.
+ return !(VT1 == MVT::i32 && VT2 == MVT::i16);
+}
+
/// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask values
Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.h?rev=72507&r1=72506&r2=72507&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.h (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.h Wed May 27 19:35:15 2009
@@ -466,6 +466,11 @@
virtual bool isZExtFree(const Type *Ty1, const Type *Ty2) const;
virtual bool isZExtFree(MVT VT1, MVT VT2) const;
+ /// isNarrowingProfitable - Return true if it's profitable to narrow
+ /// operations of type VT1 to VT2. e.g. on x86, it's profitable to narrow
+ /// from i32 to i8 but not from i32 to i16.
+ virtual bool isNarrowingProfitable(MVT VT1, MVT VT2) const;
+
/// isShuffleMaskLegal - Targets can use this to indicate that they only
/// support *some* VECTOR_SHUFFLE operations, those with specific masks.
/// By default, if a target supports the VECTOR_SHUFFLE node, all mask
Added: llvm/trunk/test/CodeGen/X86/narrow_op-1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/narrow_op-1.ll?rev=72507&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/narrow_op-1.ll (added)
+++ llvm/trunk/test/CodeGen/X86/narrow_op-1.ll Wed May 27 19:35:15 2009
@@ -0,0 +1,23 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep orb | count 1
+; RUN: llvm-as < %s | llc -march=x86-64 | grep orb | grep 1
+; RUN: llvm-as < %s | llc -march=x86-64 | grep orl | count 1
+; RUN: llvm-as < %s | llc -march=x86-64 | grep orl | grep 16842752
+
+ %struct.bf = type { i64, i16, i16, i32 }
+ at bfi = common global %struct.bf zeroinitializer, align 16
+
+define void @t1() nounwind optsize ssp {
+entry:
+ %0 = load i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+ %1 = or i32 %0, 65536
+ store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+ ret void
+}
+
+define void @t2() nounwind optsize ssp {
+entry:
+ %0 = load i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+ %1 = or i32 %0, 16842752
+ store i32 %1, i32* bitcast (i16* getelementptr (%struct.bf* @bfi, i32 0, i32 1) to i32*), align 8
+ ret void
+}
Added: llvm/trunk/test/CodeGen/X86/narrow_op-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/narrow_op-2.ll?rev=72507&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/narrow_op-2.ll (added)
+++ llvm/trunk/test/CodeGen/X86/narrow_op-2.ll Wed May 27 19:35:15 2009
@@ -0,0 +1,23 @@
+; RUN: llvm-as < %s | llc -march=x86-64 | grep andb | count 2
+; RUN: llvm-as < %s | llc -march=x86-64 | grep andb | grep 254
+; RUN: llvm-as < %s | llc -march=x86-64 | grep andb | grep 253
+
+ %struct.bf = type { i64, i16, i16, i32 }
+ at bfi = external global %struct.bf*
+
+define void @t1() nounwind ssp {
+entry:
+ %0 = load %struct.bf** @bfi, align 8
+ %1 = getelementptr %struct.bf* %0, i64 0, i32 1
+ %2 = bitcast i16* %1 to i32*
+ %3 = load i32* %2, align 1
+ %4 = and i32 %3, -65537
+ store i32 %4, i32* %2, align 1
+ %5 = load %struct.bf** @bfi, align 8
+ %6 = getelementptr %struct.bf* %5, i64 0, i32 1
+ %7 = bitcast i16* %6 to i32*
+ %8 = load i32* %7, align 1
+ %9 = and i32 %8, -131073
+ store i32 %9, i32* %7, align 1
+ ret void
+}
More information about the llvm-commits
mailing list