r299445 - Preserve vec3 type.
Jin-Gu Kang via cfe-commits
cfe-commits at lists.llvm.org
Tue Apr 4 09:40:25 PDT 2017
Author: jaykang10
Date: Tue Apr 4 11:40:25 2017
New Revision: 299445
URL: http://llvm.org/viewvc/llvm-project?rev=299445&view=rev
Log:
Preserve vec3 type.
Summary: Preserve vec3 type with CodeGen option.
Reviewers: Anastasia, bruno
Reviewed By: Anastasia
Subscribers: bruno, ahatanak, cfe-commits
Differential Revision: https://reviews.llvm.org/D30810
Added:
cfe/trunk/test/CodeGenOpenCL/preserve_vec3.cl
Modified:
cfe/trunk/include/clang/Driver/CC1Options.td
cfe/trunk/include/clang/Frontend/CodeGenOptions.def
cfe/trunk/lib/CodeGen/CGExpr.cpp
cfe/trunk/lib/CodeGen/CGExprScalar.cpp
cfe/trunk/lib/Frontend/CompilerInvocation.cpp
Modified: cfe/trunk/include/clang/Driver/CC1Options.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Driver/CC1Options.td?rev=299445&r1=299444&r2=299445&view=diff
==============================================================================
--- cfe/trunk/include/clang/Driver/CC1Options.td (original)
+++ cfe/trunk/include/clang/Driver/CC1Options.td Tue Apr 4 11:40:25 2017
@@ -658,6 +658,8 @@ def fdefault_calling_conv_EQ : Joined<["
HelpText<"Set default MS calling convention">;
def finclude_default_header : Flag<["-"], "finclude-default-header">,
HelpText<"Include the default header file for OpenCL">;
+def fpreserve_vec3_type : Flag<["-"], "fpreserve-vec3-type">,
+ HelpText<"Preserve 3-component vector type">;
// FIXME: Remove these entirely once functionality/tests have been excised.
def fobjc_gc_only : Flag<["-"], "fobjc-gc-only">, Group<f_Group>,
Modified: cfe/trunk/include/clang/Frontend/CodeGenOptions.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Frontend/CodeGenOptions.def?rev=299445&r1=299444&r2=299445&view=diff
==============================================================================
--- cfe/trunk/include/clang/Frontend/CodeGenOptions.def (original)
+++ cfe/trunk/include/clang/Frontend/CodeGenOptions.def Tue Apr 4 11:40:25 2017
@@ -263,6 +263,9 @@ CODEGENOPT(StrictReturn, 1, 1)
/// Whether emit extra debug info for sample pgo profile collection.
CODEGENOPT(DebugInfoForProfiling, 1, 0)
+/// Whether 3-component vector type is preserved.
+CODEGENOPT(PreserveVec3Type, 1, 0)
+
#undef CODEGENOPT
#undef ENUM_CODEGENOPT
#undef VALUE_CODEGENOPT
Modified: cfe/trunk/lib/CodeGen/CGExpr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExpr.cpp?rev=299445&r1=299444&r2=299445&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGExpr.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGExpr.cpp Tue Apr 4 11:40:25 2017
@@ -1369,26 +1369,28 @@ llvm::Value *CodeGenFunction::EmitLoadOf
QualType TBAABaseType,
uint64_t TBAAOffset,
bool isNontemporal) {
- // For better performance, handle vector loads differently.
- if (Ty->isVectorType()) {
- const llvm::Type *EltTy = Addr.getElementType();
-
- const auto *VTy = cast<llvm::VectorType>(EltTy);
-
- // Handle vectors of size 3 like size 4 for better performance.
- if (VTy->getNumElements() == 3) {
-
- // Bitcast to vec4 type.
- llvm::VectorType *vec4Ty = llvm::VectorType::get(VTy->getElementType(),
- 4);
- Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4");
- // Now load value.
- llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4");
-
- // Shuffle vector to get vec3.
- V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty),
- {0, 1, 2}, "extractVec");
- return EmitFromMemory(V, Ty);
+ if (!CGM.getCodeGenOpts().PreserveVec3Type) {
+ // For better performance, handle vector loads differently.
+ if (Ty->isVectorType()) {
+ const llvm::Type *EltTy = Addr.getElementType();
+
+ const auto *VTy = cast<llvm::VectorType>(EltTy);
+
+ // Handle vectors of size 3 like size 4 for better performance.
+ if (VTy->getNumElements() == 3) {
+
+ // Bitcast to vec4 type.
+ llvm::VectorType *vec4Ty =
+ llvm::VectorType::get(VTy->getElementType(), 4);
+ Address Cast = Builder.CreateElementBitCast(Addr, vec4Ty, "castToVec4");
+ // Now load value.
+ llvm::Value *V = Builder.CreateLoad(Cast, Volatile, "loadVec4");
+
+ // Shuffle vector to get vec3.
+ V = Builder.CreateShuffleVector(V, llvm::UndefValue::get(vec4Ty),
+ {0, 1, 2}, "extractVec");
+ return EmitFromMemory(V, Ty);
+ }
}
}
@@ -1456,24 +1458,25 @@ void CodeGenFunction::EmitStoreOfScalar(
uint64_t TBAAOffset,
bool isNontemporal) {
- // Handle vectors differently to get better performance.
- if (Ty->isVectorType()) {
- llvm::Type *SrcTy = Value->getType();
- auto *VecTy = cast<llvm::VectorType>(SrcTy);
- // Handle vec3 special.
- if (VecTy->getNumElements() == 3) {
- // Our source is a vec3, do a shuffle vector to make it a vec4.
- llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1),
- Builder.getInt32(2),
- llvm::UndefValue::get(Builder.getInt32Ty())};
- llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
- Value = Builder.CreateShuffleVector(Value,
- llvm::UndefValue::get(VecTy),
- MaskV, "extractVec");
- SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4);
- }
- if (Addr.getElementType() != SrcTy) {
- Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp");
+ if (!CGM.getCodeGenOpts().PreserveVec3Type) {
+ // Handle vectors differently to get better performance.
+ if (Ty->isVectorType()) {
+ llvm::Type *SrcTy = Value->getType();
+ auto *VecTy = cast<llvm::VectorType>(SrcTy);
+ // Handle vec3 special.
+ if (VecTy->getNumElements() == 3) {
+ // Our source is a vec3, do a shuffle vector to make it a vec4.
+ llvm::Constant *Mask[] = {Builder.getInt32(0), Builder.getInt32(1),
+ Builder.getInt32(2),
+ llvm::UndefValue::get(Builder.getInt32Ty())};
+ llvm::Value *MaskV = llvm::ConstantVector::get(Mask);
+ Value = Builder.CreateShuffleVector(Value, llvm::UndefValue::get(VecTy),
+ MaskV, "extractVec");
+ SrcTy = llvm::VectorType::get(VecTy->getElementType(), 4);
+ }
+ if (Addr.getElementType() != SrcTy) {
+ Addr = Builder.CreateElementBitCast(Addr, SrcTy, "storetmp");
+ }
}
}
Modified: cfe/trunk/lib/CodeGen/CGExprScalar.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExprScalar.cpp?rev=299445&r1=299444&r2=299445&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGExprScalar.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGExprScalar.cpp Tue Apr 4 11:40:25 2017
@@ -3593,8 +3593,12 @@ Value *ScalarExprEmitter::VisitAsTypeExp
// vector to get a vec4, then a bitcast if the target type is different.
if (NumElementsSrc == 3 && NumElementsDst != 3) {
Src = ConvertVec3AndVec4(Builder, CGF, Src, 4);
- Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
- DstTy);
+
+ if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) {
+ Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
+ DstTy);
+ }
+
Src->setName("astype");
return Src;
}
@@ -3603,9 +3607,12 @@ Value *ScalarExprEmitter::VisitAsTypeExp
// to vec4 if the original type is not vec4, then a shuffle vector to
// get a vec3.
if (NumElementsSrc != 3 && NumElementsDst == 3) {
- auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4);
- Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
- Vec4Ty);
+ if (!CGF.CGM.getCodeGenOpts().PreserveVec3Type) {
+ auto Vec4Ty = llvm::VectorType::get(DstTy->getVectorElementType(), 4);
+ Src = createCastsForTypeOfSameSize(Builder, CGF.CGM.getDataLayout(), Src,
+ Vec4Ty);
+ }
+
Src = ConvertVec3AndVec4(Builder, CGF, Src, 3);
Src->setName("astype");
return Src;
Modified: cfe/trunk/lib/Frontend/CompilerInvocation.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Frontend/CompilerInvocation.cpp?rev=299445&r1=299444&r2=299445&view=diff
==============================================================================
--- cfe/trunk/lib/Frontend/CompilerInvocation.cpp (original)
+++ cfe/trunk/lib/Frontend/CompilerInvocation.cpp Tue Apr 4 11:40:25 2017
@@ -729,6 +729,7 @@ static bool ParseCodeGenArgs(CodeGenOpti
}
}
+ Opts.PreserveVec3Type = Args.hasArg(OPT_fpreserve_vec3_type);
Opts.InstrumentFunctions = Args.hasArg(OPT_finstrument_functions);
Opts.XRayInstrumentFunctions = Args.hasArg(OPT_fxray_instrument);
Opts.XRayInstructionThreshold =
Added: cfe/trunk/test/CodeGenOpenCL/preserve_vec3.cl
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGenOpenCL/preserve_vec3.cl?rev=299445&view=auto
==============================================================================
--- cfe/trunk/test/CodeGenOpenCL/preserve_vec3.cl (added)
+++ cfe/trunk/test/CodeGenOpenCL/preserve_vec3.cl Tue Apr 4 11:40:25 2017
@@ -0,0 +1,24 @@
+// RUN: %clang_cc1 %s -emit-llvm -o - -triple spir-unknown-unknown -fpreserve-vec3-type | FileCheck %s
+
+typedef float float3 __attribute__((ext_vector_type(3)));
+typedef float float4 __attribute__((ext_vector_type(4)));
+
+void kernel foo(global float3 *a, global float3 *b) {
+ // CHECK: %[[LOAD_A:.*]] = load <3 x float>, <3 x float> addrspace(1)* %a
+ // CHECK: store <3 x float> %[[LOAD_A]], <3 x float> addrspace(1)* %b
+ *b = *a;
+}
+
+void kernel float4_to_float3(global float3 *a, global float4 *b) {
+ // CHECK: %[[LOAD_A:.*]] = load <4 x float>, <4 x float> addrspace(1)* %b, align 16
+ // CHECK: %[[ASTYPE:.*]] = shufflevector <4 x float> %[[LOAD_A]], <4 x float> undef, <3 x i32> <i32 0, i32 1, i32 2>
+ // CHECK: store <3 x float> %[[ASTYPE:.*]], <3 x float> addrspace(1)* %a, align 16
+ *a = __builtin_astype(*b, float3);
+}
+
+void kernel float3_to_float4(global float3 *a, global float4 *b) {
+ // CHECK: %[[LOAD_A:.*]] = load <3 x float>, <3 x float> addrspace(1)* %a, align 16
+ // CHECK: %[[ASTYPE:.*]] = shufflevector <3 x float> %[[LOAD_A]], <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+ // CHECK: store <4 x float> %[[ASTYPE:.*]], <4 x float> addrspace(1)* %b, align 16
+ *b = __builtin_astype(*a, float4);
+}
More information about the cfe-commits
mailing list