[cfe-commits] r106973 - /cfe/trunk/lib/CodeGen/CGCall.cpp
Chris Lattner
sabre at nondot.org
Sat Jun 26 22:56:15 PDT 2010
Author: lattner
Date: Sun Jun 27 00:56:15 2010
New Revision: 106973
URL: http://llvm.org/viewvc/llvm-project?rev=106973&view=rev
Log:
improve CreateCoercedLoad a bit to generate slightly less awful
IR when handling X86-64 by-value struct stuff. For example, we
use to compile this:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
into:
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp nounwind {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%0 = bitcast i64* %tmp3 to %struct.DeclGroup* ; <%struct.DeclGroup*> [#uses=1]
%1 = load %struct.DeclGroup* %agg.tmp ; <%struct.DeclGroup> [#uses=1]
store %struct.DeclGroup %1, %struct.DeclGroup* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
call void @_Z3foo9DeclGroup(i64 %2)
ret void
}
which would cause fastisel to bail out due to the first class aggregate load %1. With
this patch we now compile it into the (still awful):
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) nounwind ssp noredzone {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
%tmp3 = alloca i64 ; <i64*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = bitcast i64* %tmp3 to i32* ; <i32*> [#uses=1]
%1 = load i32* %coerce.dive ; <i32> [#uses=1]
store i32 %1, i32* %0, align 1
%2 = load i64* %tmp3 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %2) noredzone ; <i32> [#uses=0]
ret void
}
which doesn't bail out. On CGStmt.ll, this reduces fastisel bail outs from 958 to 935,
and is the precursor of better things to come.
Modified:
cfe/trunk/lib/CodeGen/CGCall.cpp
Modified: cfe/trunk/lib/CodeGen/CGCall.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGCall.cpp?rev=106973&r1=106972&r2=106973&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGCall.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGCall.cpp Sun Jun 27 00:56:15 2010
@@ -345,6 +345,41 @@
}
}
+/// EnterStructPointerForCoercedLoad - Given a pointer to a struct where we are
+/// accessing some number of bytes out of it, try to gep into the struct to get
+/// at its inner goodness. Dive as deep as possible without entering an element
+/// with an in-memory size smaller than DstSize.
+static llvm::Value *
+EnterStructPointerForCoercedLoad(llvm::Value *SrcPtr,
+ const llvm::StructType *SrcSTy,
+ uint64_t DstSize, CodeGenFunction &CGF) {
+ // We can't dive into a zero-element struct.
+ if (SrcSTy->getNumElements() == 0) return SrcPtr;
+
+ const llvm::Type *FirstElt = SrcSTy->getElementType(0);
+
+ // If the first elt is at least as large as what we're looking for, or if the
+ // first element is the same size as the whole struct, we can enter it.
+ uint64_t FirstEltSize =
+ CGF.CGM.getTargetData().getTypeAllocSize(FirstElt);
+ if (FirstEltSize < DstSize &&
+ FirstEltSize < CGF.CGM.getTargetData().getTypeAllocSize(SrcSTy))
+ return SrcPtr;
+
+ // GEP into the first element.
+ SrcPtr = CGF.Builder.CreateConstGEP2_32(SrcPtr, 0, 0, "coerce.dive");
+
+ // If the first element is a struct, recurse.
+ const llvm::Type *SrcTy =
+ cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
+ if (const llvm::StructType *SrcSTy = dyn_cast<llvm::StructType>(SrcTy))
+ return EnterStructPointerForCoercedLoad(SrcPtr, SrcSTy, DstSize, CGF);
+
+ return SrcPtr;
+}
+
+
+
/// CreateCoercedLoad - Create a load from \arg SrcPtr interpreted as
/// a pointer to an object of type \arg Ty.
///
@@ -356,8 +391,14 @@
CodeGenFunction &CGF) {
const llvm::Type *SrcTy =
cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
- uint64_t SrcSize = CGF.CGM.getTargetData().getTypeAllocSize(SrcTy);
uint64_t DstSize = CGF.CGM.getTargetData().getTypeAllocSize(Ty);
+
+ if (const llvm::StructType *SrcSTy = dyn_cast<llvm::StructType>(SrcTy)) {
+ SrcPtr = EnterStructPointerForCoercedLoad(SrcPtr, SrcSTy, DstSize, CGF);
+ SrcTy = cast<llvm::PointerType>(SrcPtr->getType())->getElementType();
+ }
+
+ uint64_t SrcSize = CGF.CGM.getTargetData().getTypeAllocSize(SrcTy);
// If load is legal, just bitcast the src pointer.
if (SrcSize >= DstSize) {
More information about the cfe-commits
mailing list