[llvm-commits] [llvm] r82439 - in /llvm/trunk: lib/Transforms/Scalar/GVN.cpp test/Transforms/GVN/rle.ll
Chris Lattner
sabre at nondot.org
Sun Sep 20 22:57:11 PDT 2009
Author: lattner
Date: Mon Sep 21 00:57:11 2009
New Revision: 82439
URL: http://llvm.org/viewvc/llvm-project?rev=82439&view=rev
Log:
Improve GVN to be able to forward substitute a small load
from a piece of a large store when both are in the same block.
This allows clang to compile the testcase in PR4216 to this code:
_test_bitfield:
movl 4(%esp), %eax
movl %eax, %ecx
andl $-65536, %ecx
orl $32962, %eax
andl $40186, %eax
orl %ecx, %eax
ret
This is not ideal, but is a whole lot better than the code produced
by llvm-gcc:
_test_bitfield:
movw $-32574, %ax
orw 4(%esp), %ax
andw $-25350, %ax
movw %ax, 4(%esp)
movw 7(%esp), %cx
shlw $8, %cx
movzbl 6(%esp), %edx
orw %cx, %dx
movzwl %dx, %ecx
shll $16, %ecx
movzwl %ax, %eax
orl %ecx, %eax
ret
and dramatically better than that produced by gcc 4.2:
_test_bitfield:
pushl %ebx
call L3
"L00000000001$pb":
L3:
popl %ebx
movl 8(%esp), %eax
leal 0(,%eax,4), %edx
sarb $7, %dl
movl %eax, %ecx
andl $7168, %ecx
andl $-7201, %ebx
movzbl %dl, %edx
andl $1, %edx
sall $5, %edx
orl %ecx, %ebx
orl %edx, %ebx
andl $24, %eax
andl $-58336, %ebx
orl %eax, %ebx
orl $32962, %ebx
movl %ebx, %eax
popl %ebx
ret
Modified:
llvm/trunk/lib/Transforms/Scalar/GVN.cpp
llvm/trunk/test/Transforms/GVN/rle.ll
Modified: llvm/trunk/lib/Transforms/Scalar/GVN.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/GVN.cpp?rev=82439&r1=82438&r2=82439&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/GVN.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/GVN.cpp Mon Sep 21 00:57:11 2009
@@ -23,6 +23,7 @@
#include "llvm/Function.h"
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
+#include "llvm/Operator.h"
#include "llvm/Value.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DepthFirstIterator.h"
@@ -38,6 +39,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -1341,6 +1343,153 @@
return true;
}
+/// GetBaseWithConstantOffset - Analyze the specified pointer to see if it can
+/// be expressed as a base pointer plus a constant offset. Return the base and
+/// offset to the caller.
+static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
+ const TargetData *TD) {
+ Operator *PtrOp = dyn_cast<Operator>(Ptr);
+ if (PtrOp == 0) return Ptr;
+
+ // Just look through bitcasts.
+ if (PtrOp->getOpcode() == Instruction::BitCast)
+ return GetBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD);
+
+ // If this is a GEP with constant indices, we can look through it.
+ GEPOperator *GEP = dyn_cast<GEPOperator>(PtrOp);
+ if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr;
+
+ gep_type_iterator GTI = gep_type_begin(GEP);
+ for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E;
+ ++I, ++GTI) {
+ ConstantInt *OpC = cast<ConstantInt>(*I);
+ if (OpC->isZero()) continue;
+
+ // Handle a struct and array indices which add their offset to the pointer.
+ if (const StructType *STy = dyn_cast<StructType>(*GTI)) {
+ Offset += TD->getStructLayout(STy)->getElementOffset(OpC->getZExtValue());
+ } else {
+ uint64_t Size = TD->getTypeAllocSize(GTI.getIndexedType());
+ Offset += OpC->getSExtValue()*Size;
+ }
+ }
+
+ // Re-sign extend from the pointer size if needed to get overflow edge cases
+ // right.
+ unsigned PtrSize = TD->getPointerSizeInBits();
+ if (PtrSize < 64)
+ Offset = (Offset << (64-PtrSize)) >> (64-PtrSize);
+
+ return GetBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD);
+}
+
+
+/// HandleLoadFromClobberingStore - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store. This means
+/// that the store *may* provide bits used by the load but we can't be sure
+/// because the pointers don't mustalias. Check this case to see if there is
+/// anything more we can do before we give up.
+static Value *HandleLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
+ const TargetData *TD) {
+ int64_t StoreOffset = 0, LoadOffset = 0;
+ Value *StoreBase =
+ GetBaseWithConstantOffset(DepSI->getPointerOperand(), StoreOffset, TD);
+ Value *LoadBase =
+ GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD);
+ if (StoreBase != LoadBase)
+ return 0;
+
+ // If the load and store are to the exact same address, they should have been
+ // a must alias. AA must have gotten confused.
+ // FIXME: Study to see if/when this happens.
+ if (LoadOffset == StoreOffset) {
+#if 0
+ errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
+ << "Base = " << *StoreBase << "\n"
+ << "Store Ptr = " << *DepSI->getPointerOperand() << "\n"
+ << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n"
+ << "Load Ptr = " << *L->getPointerOperand() << "\n"
+ << "Load Offs = " << LoadOffset << " - " << *L << "\n\n";
+ errs() << "'" << L->getParent()->getParent()->getName() << "'"
+ << *L->getParent();
+#endif
+ return 0;
+ }
+
+ // If the load and store don't overlap at all, the store doesn't provide
+ // anything to the load. In this case, they really don't alias at all, AA
+ // must have gotten confused.
+ // FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
+ // remove this check, as it is duplicated with what we have below.
+ uint64_t StoreSize = TD->getTypeSizeInBits(DepSI->getOperand(0)->getType());
+ uint64_t LoadSize = TD->getTypeSizeInBits(L->getType());
+
+ if ((StoreSize & 7) | (LoadSize & 7))
+ return 0;
+ StoreSize >>= 3; // Convert to bytes.
+ LoadSize >>= 3;
+
+
+ bool isAAFailure = false;
+ if (StoreOffset < LoadOffset) {
+ isAAFailure = StoreOffset+int64_t(StoreSize) <= LoadOffset;
+ } else {
+ isAAFailure = LoadOffset+int64_t(LoadSize) <= StoreOffset;
+ }
+ if (isAAFailure) {
+#if 0
+ errs() << "STORE LOAD DEP WITH COMMON BASE:\n"
+ << "Base = " << *StoreBase << "\n"
+ << "Store Ptr = " << *DepSI->getPointerOperand() << "\n"
+ << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n"
+ << "Load Ptr = " << *L->getPointerOperand() << "\n"
+ << "Load Offs = " << LoadOffset << " - " << *L << "\n\n";
+ errs() << "'" << L->getParent()->getParent()->getName() << "'"
+ << *L->getParent();
+#endif
+ return 0;
+ }
+
+ // If the Load isn't completely contained within the stored bits, we don't
+ // have all the bits to feed it. We could do something crazy in the future
+ // (issue a smaller load then merge the bits in) but this seems unlikely to be
+ // valuable.
+ if (StoreOffset > LoadOffset ||
+ StoreOffset+StoreSize < LoadOffset+LoadSize)
+ return 0;
+
+ // Adjust LoadOffset to be relative from the start of StoreOffset.
+ LoadOffset -= StoreOffset;
+
+ Value *SrcVal = DepSI->getOperand(0);
+
+ LLVMContext &Ctx = SrcVal->getType()->getContext();
+
+ // Compute which bits of the stored value are being used by the load. Convert
+ // to an integer type to start with.
+ if (isa<PointerType>(SrcVal->getType()))
+ SrcVal = new PtrToIntInst(SrcVal, TD->getIntPtrType(Ctx), "tmp", L);
+ if (!isa<IntegerType>(SrcVal->getType()))
+ SrcVal = new BitCastInst(SrcVal, IntegerType::get(Ctx, StoreSize*8), "", L);
+
+ // Shift the bits to the least significant depending on endianness.
+ unsigned ShiftAmt;
+ if (TD->isLittleEndian()) {
+ ShiftAmt = LoadOffset*8;
+ } else {
+ ShiftAmt = StoreSize-LoadSize-LoadOffset;
+ }
+
+ SrcVal = BinaryOperator::CreateLShr(SrcVal,
+ ConstantInt::get(SrcVal->getType(), ShiftAmt), "tmp", L);
+
+ SrcVal = new TruncInst(SrcVal, IntegerType::get(Ctx, LoadSize*8), "", L);
+
+ return CoerceAvailableValueToLoadType(SrcVal, L->getType(), L, *TD);
+}
+
+
+
/// processLoad - Attempt to eliminate a load, first by eliminating it
/// locally, and then attempting non-local elimination if that fails.
bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
@@ -1352,7 +1501,12 @@
// If the value isn't available, don't do anything!
if (Dep.isClobber()) {
- // FIXME: In the future, we should handle things like:
+ // FIXME: We should handle memset/memcpy/memmove as dependent instructions
+ // to forward the value if available.
+ //if (isa<MemIntrinsic>(Dep.getInst()))
+ //errs() << "LOAD DEPENDS ON MEM: " << *L << "\n" << *Dep.getInst()<<"\n\n";
+
+ // Check to see if we have something like this:
// store i32 123, i32* %P
// %A = bitcast i32* %P to i8*
// %B = gep i8* %A, i32 1
@@ -1362,6 +1516,21 @@
// a common base + constant offset, and if the previous store (or memset)
// completely covers this load. This sort of thing can happen in bitfield
// access code.
+ if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
+ if (const TargetData *TD = getAnalysisIfAvailable<TargetData>())
+ if (Value *AvailVal = HandleLoadFromClobberingStore(L, DepSI, TD)) {
+ DEBUG(errs() << "GVN COERCED STORE BITS:\n" << *DepSI << '\n'
+ << *AvailVal << '\n' << *L << "\n\n\n");
+
+ // Replace the load!
+ L->replaceAllUsesWith(AvailVal);
+ if (isa<PointerType>(AvailVal->getType()))
+ MD->invalidateCachedPointerInfo(AvailVal);
+ toErase.push_back(L);
+ NumGVNLoad++;
+ return true;
+ }
+
DEBUG(
// fast print dep, using operator<< on instruction would be too slow
errs() << "GVN: load ";
@@ -1429,12 +1598,6 @@
return true;
}
- // FIXME: We should handle memset/memcpy/memmove as dependent instructions to
- // forward the value if available.
- //if (isa<MemIntrinsic>(DepInst))
- // errs() << "LOAD DEPENDS ON MEM: " << *L << "\n" << *DepInst << "\n\n";
-
-
// If this load really doesn't depend on anything, then we must be loading an
// undef value. This can happen when loading for a fresh allocation with no
// intervening stores, for example.
Modified: llvm/trunk/test/Transforms/GVN/rle.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/GVN/rle.ll?rev=82439&r1=82438&r2=82439&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/GVN/rle.ll (original)
+++ llvm/trunk/test/Transforms/GVN/rle.ll Mon Sep 21 00:57:11 2009
@@ -141,6 +141,35 @@
; CHECK: ret i8 %A
}
+;; non-local i32/float -> i8 load forwarding. This also tests that the "P3"
+;; bitcast equivalence can be properly phi translated.
+define i8 @coerce_mustalias_nonlocal1(i32* %P, i1 %cond) {
+ %P2 = bitcast i32* %P to float*
+ br i1 %cond, label %T, label %F
+T:
+ store i32 42, i32* %P
+ br label %Cont
+
+F:
+ store float 1.0, float* %P2
+ br label %Cont
+
+Cont:
+ %P3 = bitcast i32* %P to i8*
+ %A = load i8* %P3
+ ret i8 %A
+
+;; FIXME: This is disabled because this caused a miscompile in the llvm-gcc
+;; bootstrap, see r82411
+;
+; HECK: @coerce_mustalias_nonlocal1
+; HECK: Cont:
+; HECK: %A = phi i8 [
+; HECK-NOT: load
+; HECK: ret i8 %A
+}
+
+
;; non-local i32 -> i8 partial redundancy load forwarding.
define i8 @coerce_mustalias_pre0(i32* %P, i1 %cond) {
%P3 = bitcast i32* %P to i8*
@@ -165,3 +194,24 @@
; CHECK: ret i8 %A
}
+;;===----------------------------------------------------------------------===;;
+;; Store -> Load and Load -> Load forwarding where src and dst are different
+;; types, and the reload is an offset from the store pointer.
+;;===----------------------------------------------------------------------===;;
+
+;; i32 -> f32 forwarding.
+define i8 @coerce_offset0(i32 %V, i32* %P) {
+ store i32 %V, i32* %P
+
+ %P2 = bitcast i32* %P to i8*
+ %P3 = getelementptr i8* %P2, i32 2
+
+ %A = load i8* %P3
+ ret i8 %A
+; CHECK: @coerce_offset0
+; CHECK-NOT: load
+; CHECK: ret i8
+}
+
+
+
More information about the llvm-commits
mailing list