[llvm-commits] [llvm] r50079 - in /llvm/trunk: lib/Transforms/Scalar/InstructionCombining.cpp test/Transforms/InstCombine/getelementptr-seteq.ll
Chris Lattner
sabre at nondot.org
Mon Apr 21 19:53:33 PDT 2008
Author: lattner
Date: Mon Apr 21 21:53:33 2008
New Revision: 50079
URL: http://llvm.org/viewvc/llvm-project?rev=50079&view=rev
Log:
optimize "p != gep p, ..." better. This allows us to compile
getelementptr-seteq.ll into:
define i1 @test(i64 %X, %S* %P) {
%C = icmp eq i64 %X, -1 ; <i1> [#uses=1]
ret i1 %C
}
instead of:
define i1 @test(i64 %X, %S* %P) {
%A.idx.mask = and i64 %X, 4611686018427387903 ; <i64> [#uses=1]
%C = icmp eq i64 %A.idx.mask, 4611686018427387903 ; <i1> [#uses=1]
ret i1 %C
}
And fixes the second half of PR2235. This speeds up the insertion sort
case by 45%, from 1.12s to 0.77s. In practice, this will significantly
speed up for loops structured like:
for (double *P = Base + N; P != Base; --P)
...
Which happens frequently for C++ iterators.
Added:
llvm/trunk/test/Transforms/InstCombine/getelementptr-seteq.ll
Modified:
llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp
Modified: llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp?rev=50079&r1=50078&r2=50079&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/InstructionCombining.cpp Mon Apr 21 21:53:33 2008
@@ -4873,7 +4873,7 @@
Value *Result = Constant::getNullValue(IntPtrTy);
// Build a mask for high order bits.
- unsigned IntPtrWidth = TD.getPointerSize()*8;
+ unsigned IntPtrWidth = TD.getPointerSizeInBits();
uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
@@ -4937,6 +4937,114 @@
return Result;
}
+
+/// EvaluateGEPOffsetExpression - Return an value that can be used to compare of
+/// the *offset* implied by GEP to zero. For example, if we have &A[i], we want
+/// to return 'i' for "icmp ne i, 0". Note that, in general, indices can be
+/// complex, and scales are involved. The above expression would also be legal
+/// to codegen as "icmp ne (i*4), 0" (assuming A is a pointer to i32). This
+/// later form is less amenable to optimization though, and we are allowed to
+/// generate the first by knowing that pointer arithmetic doesn't overflow.
+///
+/// If we can't emit an optimized form for this expression, this returns null.
+///
+static Value *EvaluateGEPOffsetExpression(User *GEP, Instruction &I,
+ InstCombiner &IC) {
+// return 0;
+ TargetData &TD = IC.getTargetData();
+ gep_type_iterator GTI = gep_type_begin(GEP);
+
+ // Check to see if this gep only has a single variable index. If so, and if
+ // any constant indices are a multiple of its scale, then we can compute this
+ // in terms of the scale of the variable index. For example, if the GEP
+ // implies an offset of "12 + i*4", then we can codegen this as "3 + i",
+ // because the expression will cross zero at the same point.
+ unsigned i, e = GEP->getNumOperands();
+ int64_t Offset = 0;
+ for (i = 1; i != e; ++i, ++GTI) {
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i))) {
+ // Compute the aggregate offset of constant indices.
+ if (CI->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+ } else {
+ uint64_t Size = TD.getABITypeSize(GTI.getIndexedType());
+ Offset += Size*CI->getSExtValue();
+ }
+ } else {
+ // Found our variable index.
+ break;
+ }
+ }
+
+ // If there are no variable indices, we must have a constant offset, just
+ // evaluate it the general way.
+ if (i == e) return 0;
+
+ Value *VariableIdx = GEP->getOperand(i);
+ // Determine the scale factor of the variable element. For example, this is
+ // 4 if the variable index is into an array of i32.
+ uint64_t VariableScale = TD.getABITypeSize(GTI.getIndexedType());
+
+ // Verify that there are no other variable indices. If so, emit the hard way.
+ for (++i, ++GTI; i != e; ++i, ++GTI) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(GEP->getOperand(i));
+ if (!CI) return 0;
+
+ // Compute the aggregate offset of constant indices.
+ if (CI->isZero()) continue;
+
+ // Handle a struct index, which adds its field offset to the pointer.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += TD.getStructLayout(STy)->getElementOffset(CI->getZExtValue());
+ } else {
+ uint64_t Size = TD.getABITypeSize(GTI.getIndexedType());
+ Offset += Size*CI->getSExtValue();
+ }
+ }
+
+ // Okay, we know we have a single variable index, which must be a
+ // pointer/array/vector index. If there is no offset, life is simple, return
+ // the index.
+ unsigned IntPtrWidth = TD.getPointerSizeInBits();
+ if (Offset == 0) {
+ // Cast to intptrty in case a truncation occurs. If an extension is needed,
+ // we don't need to bother extending: the extension won't affect where the
+ // computation crosses zero.
+ if (VariableIdx->getType()->getPrimitiveSizeInBits() > IntPtrWidth)
+ VariableIdx = new TruncInst(VariableIdx, TD.getIntPtrType(),
+ VariableIdx->getNameStart(), &I);
+ return VariableIdx;
+ }
+
+ // Otherwise, there is an index. The computation we will do will be modulo
+ // the pointer size, so get it.
+ uint64_t PtrSizeMask = ~0ULL >> (64-IntPtrWidth);
+
+ Offset &= PtrSizeMask;
+ VariableScale &= PtrSizeMask;
+
+ // To do this transformation, any constant index must be a multiple of the
+ // variable scale factor. For example, we can evaluate "12 + 4*i" as "3 + i",
+ // but we can't evaluate "10 + 3*i" in terms of i. Check that the offset is a
+ // multiple of the variable scale.
+ int64_t NewOffs = Offset / (int64_t)VariableScale;
+ if (Offset != NewOffs*(int64_t)VariableScale)
+ return 0;
+
+ // Okay, we can do this evaluation. Start by converting the index to intptr.
+ const Type *IntPtrTy = TD.getIntPtrType();
+ if (VariableIdx->getType() != IntPtrTy)
+ VariableIdx = CastInst::createIntegerCast(VariableIdx, IntPtrTy,
+ true /*SExt*/,
+ VariableIdx->getNameStart(), &I);
+ Constant *OffsetVal = ConstantInt::get(IntPtrTy, NewOffs);
+ return BinaryOperator::createAdd(VariableIdx, OffsetVal, "offset", &I);
+}
+
+
/// FoldGEPICmp - Fold comparisons between a GEP instruction and something
/// else. At this point we know that the GEP is on the LHS of the comparison.
Instruction *InstCombiner::FoldGEPICmp(User *GEPLHS, Value *RHS,
@@ -4944,15 +5052,20 @@
Instruction &I) {
assert(dyn_castGetElementPtr(GEPLHS) && "LHS is not a getelementptr!");
- if (CastInst *CI = dyn_cast<CastInst>(RHS))
- if (isa<PointerType>(CI->getOperand(0)->getType()))
- RHS = CI->getOperand(0);
+ // Look through bitcasts.
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(RHS))
+ RHS = BCI->getOperand(0);
Value *PtrBase = GEPLHS->getOperand(0);
if (PtrBase == RHS) {
// ((gep Ptr, OFFSET) cmp Ptr) ---> (OFFSET cmp 0).
- // This transformation is valid because we know pointers can't overflow.
- Value *Offset = EmitGEPOffset(GEPLHS, I, *this);
+ // This transformation (ignoring the base and scales) is valid because we
+ // know pointers can't overflow. See if we can output an optimized form.
+ Value *Offset = EvaluateGEPOffsetExpression(GEPLHS, I, *this);
+
+ // If not, synthesize the offset the hard way.
+ if (Offset == 0)
+ Offset = EmitGEPOffset(GEPLHS, I, *this);
return new ICmpInst(ICmpInst::getSignedPredicate(Cond), Offset,
Constant::getNullValue(Offset->getType()));
} else if (User *GEPRHS = dyn_castGetElementPtr(RHS)) {
Added: llvm/trunk/test/Transforms/InstCombine/getelementptr-seteq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/InstCombine/getelementptr-seteq.ll?rev=50079&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/InstCombine/getelementptr-seteq.ll (added)
+++ llvm/trunk/test/Transforms/InstCombine/getelementptr-seteq.ll Mon Apr 21 21:53:33 2008
@@ -0,0 +1,13 @@
+; Test folding of constantexpr geps into normal geps.
+; RUN: llvm-as < %s | opt -instcombine | llvm-dis | grep {icmp eq i64 %X, -1}
+; PR2235
+
+%S = type { i32, [ 100 x i32] }
+
+define i1 @test(i64 %X, %S* %P) {
+ %A = getelementptr %S* %P, i32 0, i32 1, i64 %X
+ %B = getelementptr %S* %P, i32 0, i32 0
+ %C = icmp eq i32* %A, %B
+ ret i1 %C
+}
+
More information about the llvm-commits
mailing list