[cfe-commits] r162002 - in /cfe/trunk: lib/CodeGen/CGExpr.cpp test/CodeGen/alignment.c test/CodeGenOpenCL/vectorLoadStore.cl
Tanya Lattner
tonic at nondot.org
Wed Aug 15 17:10:13 PDT 2012
Author: tbrethou
Date: Wed Aug 15 19:10:13 2012
New Revision: 162002
URL: http://llvm.org/viewvc/llvm-project?rev=162002&view=rev
Log:
Convert loads and stores of vec3 to vec4 to achieve better code generation. Add test case.
Added:
cfe/trunk/test/CodeGenOpenCL/vectorLoadStore.cl
Modified:
cfe/trunk/lib/CodeGen/CGExpr.cpp
cfe/trunk/test/CodeGen/alignment.c
Modified: cfe/trunk/lib/CodeGen/CGExpr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExpr.cpp?rev=162002&r1=162001&r2=162002&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGExpr.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGExpr.cpp Wed Aug 15 19:10:13 2012
@@ -938,6 +938,50 @@
llvm::Value *CodeGenFunction::EmitLoadOfScalar(llvm::Value *Addr, bool Volatile,
unsigned Alignment, QualType Ty,
llvm::MDNode *TBAAInfo) {
+
+ // For better performance, handle vector loads differently.
+ if (Ty->isVectorType()) {
+ llvm::Value *V;
+ const llvm::Type *EltTy =
+ cast<llvm::PointerType>(Addr->getType())->getElementType();
+
+ const llvm::VectorType *VTy = cast<llvm::VectorType>(EltTy);
+
+ // Handle vectors of size 3, like size 4 for better performance.
+ if (VTy->getNumElements() == 3) {
+
+ // Bitcast to vec4 type.
+ llvm::VectorType *vec4Ty = llvm::VectorType::get(VTy->getElementType(),
+ 4);
+ llvm::PointerType *ptVec4Ty =
+ llvm::PointerType::get(vec4Ty,
+ (cast<llvm::PointerType>(
+ Addr->getType()))->getAddressSpace());
+ llvm::Value *Cast = Builder.CreateBitCast(Addr, ptVec4Ty,
+ "castToVec4");
+ // Now load value.
+ llvm::Value *LoadVal = Builder.CreateLoad(Cast, Volatile, "loadVec4");
+
+ // Shuffle vector to get vec3.
+ llvm::SmallVector<llvm::Constant*, 3> Mask;
+ Mask.push_back(llvm::ConstantInt::get(
+ llvm::Type::getInt32Ty(getLLVMContext()),
+ 0));
+ Mask.push_back(llvm::ConstantInt::get(
+ llvm::Type::getInt32Ty(getLLVMContext()),
+ 1));
+ Mask.push_back(llvm::ConstantInt::get(
+ llvm::Type::getInt32Ty(getLLVMContext()),
+ 2));
+
+ llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
+ V = Builder.CreateShuffleVector(LoadVal,
+ llvm::UndefValue::get(vec4Ty),
+ MaskV, "extractVec");
+ return EmitFromMemory(V, Ty);
+ }
+ }
+
llvm::LoadInst *Load = Builder.CreateLoad(Addr);
if (Volatile)
Load->setVolatile(true);
@@ -984,6 +1028,42 @@
QualType Ty,
llvm::MDNode *TBAAInfo,
bool isInit) {
+
+ // Handle vectors differently to get better performance.
+ if (Ty->isVectorType()) {
+ llvm::Type *SrcTy = Value->getType();
+ llvm::VectorType *VecTy = cast<llvm::VectorType>(SrcTy);
+ // Handle vec3 special.
+ if (VecTy->getNumElements() == 3) {
+ llvm::LLVMContext &VMContext = getLLVMContext();
+
+ // Our source is a vec3, do a shuffle vector to make it a vec4.
+ llvm::SmallVector<llvm::Constant*, 4> Mask;
+ Mask.push_back(llvm::ConstantInt::get(
+ llvm::Type::getInt32Ty(VMContext),
+ 0));
+ Mask.push_back(llvm::ConstantInt::get(
+ llvm::Type::getInt32Ty(VMContext),
+ 1));
+ Mask.push_back(llvm::ConstantInt::get(
+ llvm::Type::getInt32Ty(VMContext),
+ 2));
+ Mask.push_back(llvm::UndefValue::get(llvm::Type::getInt32Ty(VMContext)));
+
+ llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
+ Value = Builder.CreateShuffleVector(Value,
+ llvm::UndefValue::get(VecTy),
+ MaskV, "extractVec");
+ SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4);
+ }
+ llvm::PointerType *DstPtr = cast<llvm::PointerType>(Addr->getType());
+ if (DstPtr->getElementType() != SrcTy) {
+ llvm::Type *MemTy =
+ llvm::PointerType::get(SrcTy, DstPtr->getAddressSpace());
+ Addr = Builder.CreateBitCast(Addr, MemTy, "storetmp");
+ }
+ }
+
Value = EmitToMemory(Value, Ty);
llvm::StoreInst *Store = Builder.CreateStore(Value, Addr, Volatile);
Modified: cfe/trunk/test/CodeGen/alignment.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/alignment.c?rev=162002&r1=162001&r2=162002&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/alignment.c (original)
+++ cfe/trunk/test/CodeGen/alignment.c Wed Aug 15 19:10:13 2012
@@ -43,7 +43,8 @@
*p = (packedfloat3) { 3.2f, 2.3f, 0.1f };
}
// CHECK: @test3(
-// CHECK: store <3 x float> {{.*}}, align 4
+// CHECK: bitcast <3 x float>* %.compoundliteral to <4 x float>*
+// CHECK: store <4 x float> {{.*}}, align 4
// CHECK: ret void
Added: cfe/trunk/test/CodeGenOpenCL/vectorLoadStore.cl
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/vectorLoadStore.cl?rev=162002&view=auto
==============================================================================
--- cfe/trunk/test/CodeGenOpenCL/vectorLoadStore.cl (added)
+++ cfe/trunk/test/CodeGenOpenCL/vectorLoadStore.cl Wed Aug 15 19:10:13 2012
@@ -0,0 +1,9 @@
+// RUN: %clang_cc1 %s -emit-llvm -O0 -o - | FileCheck %s
+
+typedef char char3 __attribute((ext_vector_type(3)));;
+
+// Check for optimized vec3 load/store which treats vec3 as vec4.
+void foo(char3 *P, char3 *Q) {
+ *P = *Q;
+ // CHECK: %extractVec = shufflevector <4 x i8> %loadVec4, <4 x i8> undef, <3 x i32> <i32 0, i32 1, i32 2>
+}
More information about the cfe-commits
mailing list