[llvm] r174746 - DAGCombiner: Constant folding around pre-increment loads/stores
Hal Finkel
hfinkel at anl.gov
Fri Feb 8 13:56:34 PST 2013
Renato,
Since you offered (perhaps that was dangerous)...
I can't test the post-decrement case here with PowerPC, but I think it can be tested with ARM (I think that only ARM and Hexagon have post-decrement in tree). Can you see if you can make an ARM test for this as well?
Thanks again,
Hal
----- Original Message -----
> From: "Hal Finkel" <hfinkel at anl.gov>
> To: llvm-commits at cs.uiuc.edu
> Sent: Friday, February 8, 2013 3:35:47 PM
> Subject: [llvm] r174746 - DAGCombiner: Constant folding around pre-increment loads/stores
>
> Author: hfinkel
> Date: Fri Feb 8 15:35:47 2013
> New Revision: 174746
>
> URL: http://llvm.org/viewvc/llvm-project?rev=174746&view=rev
> Log:
> DAGCombiner: Constant folding around pre-increment loads/stores
>
> Previously, even when a pre-increment load or store was generated,
> we often needed to keep a copy of the original base register for use
> with other offsets. If all of these offsets are constants (including
> the offset which was combined into the addressing mode), then this is
> clearly unnecessary. This change adjusts these other offsets to use
> the
> new incremented address.
>
> Added:
> llvm/trunk/test/CodeGen/PowerPC/stdux-constuse.ll
> Modified:
> llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
>
> Modified: llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp?rev=174746&r1=174745&r2=174746&view=diff
> ==============================================================================
> --- llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (original)
> +++ llvm/trunk/lib/CodeGen/SelectionDAG/DAGCombiner.cpp Fri Feb 8
> 15:35:47 2013
> @@ -6917,6 +6917,16 @@ bool DAGCombiner::CombineToPreIndexedLoa
> ISD::MemIndexedMode AM = ISD::UNINDEXED;
> if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
> return false;
> +
> + // Backends without true r+i pre-indexed forms may need to pass a
> + // constant base with a variable offset so that constant coercion
> + // will work with the patterns in canonical form.
> + bool Swapped = false;
> + if (isa<ConstantSDNode>(BasePtr)) {
> + std::swap(BasePtr, Offset);
> + Swapped = true;
> + }
> +
> // Don't create a indexed load / store with zero offset.
> if (isa<ConstantSDNode>(Offset) &&
> cast<ConstantSDNode>(Offset)->isNullValue())
> @@ -6942,6 +6952,48 @@ bool DAGCombiner::CombineToPreIndexedLoa
> return false;
> }
>
> + // If the offset is a constant, there may be other adds of
> constants that
> + // can be folded with this one. We should do this to avoid having
> to keep
> + // a copy of the original base pointer.
> + SmallVector<SDNode *, 16> OtherUses;
> + if (isa<ConstantSDNode>(Offset))
> + for (SDNode::use_iterator I = BasePtr.getNode()->use_begin(),
> + E = BasePtr.getNode()->use_end(); I != E; ++I) {
> + SDNode *Use = *I;
> + if (Use == Ptr.getNode())
> + continue;
> +
> + if (Use->isPredecessorOf(N))
> + continue;
> +
> + if (Use->getOpcode() != ISD::ADD && Use->getOpcode() !=
> ISD::SUB) {
> + OtherUses.clear();
> + break;
> + }
> +
> + SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1);
> + if (Op1.getNode() == BasePtr.getNode())
> + std::swap(Op0, Op1);
> + assert(Op0.getNode() == BasePtr.getNode() &&
> + "Use of ADD/SUB but not an operand");
> +
> + if (!isa<ConstantSDNode>(Op1)) {
> + OtherUses.clear();
> + break;
> + }
> +
> + // FIXME: In some cases, we can be smarter about this.
> + if (Op1.getValueType() != Offset.getValueType()) {
> + OtherUses.clear();
> + break;
> + }
> +
> + OtherUses.push_back(Use);
> + }
> +
> + if (Swapped)
> + std::swap(BasePtr, Offset);
> +
> // Now check for #3 and #4.
> bool RealUse = false;
>
> @@ -6991,6 +7043,43 @@ bool DAGCombiner::CombineToPreIndexedLoa
> // Finally, since the node is now dead, remove it from the graph.
> DAG.DeleteNode(N);
>
> + if (Swapped)
> + std::swap(BasePtr, Offset);
> +
> + // Replace other uses of BasePtr that can be updated to use Ptr
> + for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
> + unsigned OffsetIdx = 1;
> + if (OtherUses[i]->getOperand(OffsetIdx).getNode() ==
> BasePtr.getNode())
> + OffsetIdx = 0;
> + assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
> + BasePtr.getNode() && "Expected BasePtr operand");
> +
> + APInt OV =
> + cast<ConstantSDNode>(Offset)->getAPIntValue();
> + if (AM == ISD::PRE_DEC)
> + OV = -OV;
> +
> + ConstantSDNode *CN =
> + cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
> + APInt CNV = CN->getAPIntValue();
> + if (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1)
> + CNV += OV;
> + else
> + CNV -= OV;
> +
> + SDValue NewOp1 = Result.getValue(isLoad ? 1 : 0);
> + SDValue NewOp2 = DAG.getConstant(CNV, CN->getValueType(0));
> + if (OffsetIdx == 0)
> + std::swap(NewOp1, NewOp2);
> +
> + SDValue NewUse = DAG.getNode(OtherUses[i]->getOpcode(),
> + OtherUses[i]->getDebugLoc(),
> + OtherUses[i]->getValueType(0),
> NewOp1, NewOp2);
> + DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
> + removeFromWorkList(OtherUses[i]);
> + DAG.DeleteNode(OtherUses[i]);
> + }
> +
> // Replace the uses of Ptr with uses of the updated base value.
> DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 :
> 0));
> removeFromWorkList(Ptr.getNode());
>
> Added: llvm/trunk/test/CodeGen/PowerPC/stdux-constuse.ll
> URL:
> http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/PowerPC/stdux-constuse.ll?rev=174746&view=auto
> ==============================================================================
> --- llvm/trunk/test/CodeGen/PowerPC/stdux-constuse.ll (added)
> +++ llvm/trunk/test/CodeGen/PowerPC/stdux-constuse.ll Fri Feb 8
> 15:35:47 2013
> @@ -0,0 +1,47 @@
> +; RUN: llc -mcpu=a2 -disable-lsr < %s | FileCheck %s
> +target datalayout =
> "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
> +target triple = "powerpc64-unknown-linux-gnu"
> +
> +define i32 @test1(i64 %add, i64* %ptr) nounwind {
> +entry:
> + %p1 = getelementptr i64* %ptr, i64 144115188075855
> + br label %for.cond2.preheader
> +
> +for.cond2.preheader:
> + %nl.018 = phi i32 [ 0, %entry ], [ %inc9, %for.end ]
> + br label %for.body4
> +
> +for.body4:
> + %lsr.iv = phi i32 [ %lsr.iv.next, %for.body4 ], [ 16000,
> %for.cond2.preheader ]
> + %i0 = phi i64* [ %p1, %for.cond2.preheader ], [ %i6, %for.body4 ]
> + %i6 = getelementptr i64* %i0, i64 400000
> + %i7 = getelementptr i64* %i6, i64 300000
> + %i8 = getelementptr i64* %i6, i64 200000
> + %i9 = getelementptr i64* %i6, i64 100000
> + store i64 %add, i64* %i6, align 32
> + store i64 %add, i64* %i7, align 32
> + store i64 %add, i64* %i8, align 32
> + store i64 %add, i64* %i9, align 32
> + %lsr.iv.next = add i32 %lsr.iv, -16
> + %exitcond.15 = icmp eq i32 %lsr.iv.next, 0
> + br i1 %exitcond.15, label %for.end, label %for.body4
> +
> +; Make sure that we generate the most compact form of this loop with
> no
> +; unnecessary moves
> +; CHECK: @test1
> +; CHECK: mtctr
> +; CHECK: stdux
> +; CHECK-NEXT: stdx
> +; CHECK-NEXT: stdx
> +; CHECK-NEXT: stdx
> +; CHECK-NEXT: bdnz
> +
> +for.end:
> + %inc9 = add nsw i32 %nl.018, 1
> + %exitcond = icmp eq i32 %inc9, 400000
> + br i1 %exitcond, label %for.end10, label %for.cond2.preheader
> +
> +for.end10:
> + ret i32 0
> +}
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>
More information about the llvm-commits
mailing list