r247104 - Introduce __builtin_nontemporal_store and __builtin_nontemporal_load.
Michael Zolotukhin via cfe-commits
cfe-commits at lists.llvm.org
Tue Sep 8 16:52:34 PDT 2015
Author: mzolotukhin
Date: Tue Sep 8 18:52:33 2015
New Revision: 247104
URL: http://llvm.org/viewvc/llvm-project?rev=247104&view=rev
Log:
Introduce __builtin_nontemporal_store and __builtin_nontemporal_load.
Summary:
Currently clang provides no general way to generate nontemporal loads/stores.
There are some architecture specific builtins for doing so (e.g. in x86), but
there is no way to generate non-temporal store on, e.g. AArch64. This patch adds
generic builtins which are expanded to a simple store with '!nontemporal'
attribute in IR.
Differential Revision: http://reviews.llvm.org/D12313
Added:
cfe/trunk/test/CodeGen/Nontemporal.cpp
Modified:
cfe/trunk/include/clang/Basic/Builtins.def
cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
cfe/trunk/include/clang/Sema/Sema.h
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/CodeGen/CGExpr.cpp
cfe/trunk/lib/CodeGen/CGValue.h
cfe/trunk/lib/CodeGen/CodeGenFunction.h
cfe/trunk/lib/Sema/SemaChecking.cpp
Modified: cfe/trunk/include/clang/Basic/Builtins.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Builtins.def?rev=247104&r1=247103&r2=247104&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/Builtins.def (original)
+++ cfe/trunk/include/clang/Basic/Builtins.def Tue Sep 8 18:52:33 2015
@@ -1245,6 +1245,10 @@ BUILTIN(__builtin_operator_delete, "vv*"
BUILTIN(__builtin___get_unsafe_stack_start, "v*", "Fn")
BUILTIN(__builtin___get_unsafe_stack_ptr, "v*", "Fn")
+// Nontemporal loads/stores builtins
+BUILTIN(__builtin_nontemporal_store, "v.", "t")
+BUILTIN(__builtin_nontemporal_load, "v.", "t")
+
#undef BUILTIN
#undef LIBBUILTIN
#undef LANGBUILTIN
Modified: cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td?rev=247104&r1=247103&r2=247104&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td (original)
+++ cfe/trunk/include/clang/Basic/DiagnosticSemaKinds.td Tue Sep 8 18:52:33 2015
@@ -6200,6 +6200,12 @@ def err_atomic_load_store_uses_lib : Err
"atomic %select{load|store}0 requires runtime support that is not "
"available for this target">;
+def err_nontemporal_builtin_must_be_pointer : Error<
+ "address argument to nontemporal builtin must be a pointer (%0 invalid)">;
+def err_nontemporal_builtin_must_be_pointer_intfltptr_or_vector : Error<
+ "address argument to nontemporal builtin must be a pointer to integer, float, "
+ "pointer, or a vector of such types (%0 invalid)">;
+
def err_deleted_function_use : Error<"attempt to use a deleted function">;
def err_kern_type_not_void_return : Error<
Modified: cfe/trunk/include/clang/Sema/Sema.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Sema/Sema.h?rev=247104&r1=247103&r2=247104&view=diff
==============================================================================
--- cfe/trunk/include/clang/Sema/Sema.h (original)
+++ cfe/trunk/include/clang/Sema/Sema.h Tue Sep 8 18:52:33 2015
@@ -8851,6 +8851,7 @@ private:
bool SemaBuiltinLongjmp(CallExpr *TheCall);
bool SemaBuiltinSetjmp(CallExpr *TheCall);
ExprResult SemaBuiltinAtomicOverloaded(ExprResult TheCallResult);
+ ExprResult SemaBuiltinNontemporalOverloaded(ExprResult TheCallResult);
ExprResult SemaAtomicOpsOverloaded(ExprResult TheCallResult,
AtomicExpr::AtomicOp Op);
bool SemaBuiltinConstantArg(CallExpr *TheCall, int ArgNum,
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=247104&r1=247103&r2=247104&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue Sep 8 18:52:33 2015
@@ -111,6 +111,28 @@ static Value *MakeBinaryAtomicValue(Code
return EmitFromInt(CGF, Result, T, ValueType);
}
+static Value *EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E) {
+ Value *Val = CGF.EmitScalarExpr(E->getArg(0));
+ Value *Address = CGF.EmitScalarExpr(E->getArg(1));
+
+ // Convert the type of the pointer to a pointer to the stored type.
+ Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
+ Value *BC = CGF.Builder.CreateBitCast(
+ Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
+ LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
+ LV.setNontemporal(true);
+ CGF.EmitStoreOfScalar(Val, LV, false);
+ return nullptr;
+}
+
+static Value *EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E) {
+ Value *Address = CGF.EmitScalarExpr(E->getArg(0));
+
+ LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
+ LV.setNontemporal(true);
+ return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
+}
+
static RValue EmitBinaryAtomic(CodeGenFunction &CGF,
llvm::AtomicRMWInst::BinOp Kind,
const CallExpr *E) {
@@ -1143,6 +1165,10 @@ RValue CodeGenFunction::EmitBuiltinExpr(
return RValue::get(nullptr);
}
+ case Builtin::BI__builtin_nontemporal_load:
+ return RValue::get(EmitNontemporalLoad(*this, E));
+ case Builtin::BI__builtin_nontemporal_store:
+ return RValue::get(EmitNontemporalStore(*this, E));
case Builtin::BI__c11_atomic_is_lock_free:
case Builtin::BI__atomic_is_lock_free: {
// Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
Modified: cfe/trunk/lib/CodeGen/CGExpr.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExpr.cpp?rev=247104&r1=247103&r2=247104&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGExpr.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGExpr.cpp Tue Sep 8 18:52:33 2015
@@ -1160,7 +1160,8 @@ llvm::Value *CodeGenFunction::EmitLoadOf
return EmitLoadOfScalar(lvalue.getAddress(), lvalue.isVolatile(),
lvalue.getType(), Loc, lvalue.getAlignmentSource(),
lvalue.getTBAAInfo(),
- lvalue.getTBAABaseType(), lvalue.getTBAAOffset());
+ lvalue.getTBAABaseType(), lvalue.getTBAAOffset(),
+ lvalue.isNontemporal());
}
static bool hasBooleanRepresentation(QualType Ty) {
@@ -1226,7 +1227,8 @@ llvm::Value *CodeGenFunction::EmitLoadOf
AlignmentSource AlignSource,
llvm::MDNode *TBAAInfo,
QualType TBAABaseType,
- uint64_t TBAAOffset) {
+ uint64_t TBAAOffset,
+ bool isNontemporal) {
// For better performance, handle vector loads differently.
if (Ty->isVectorType()) {
const llvm::Type *EltTy = Addr.getElementType();
@@ -1258,6 +1260,11 @@ llvm::Value *CodeGenFunction::EmitLoadOf
}
llvm::LoadInst *Load = Builder.CreateLoad(Addr, Volatile);
+ if (isNontemporal) {
+ llvm::MDNode *Node = llvm::MDNode::get(
+ Load->getContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
+ Load->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
+ }
if (TBAAInfo) {
llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
TBAAOffset);
@@ -1330,7 +1337,8 @@ void CodeGenFunction::EmitStoreOfScalar(
AlignmentSource AlignSource,
llvm::MDNode *TBAAInfo,
bool isInit, QualType TBAABaseType,
- uint64_t TBAAOffset) {
+ uint64_t TBAAOffset,
+ bool isNontemporal) {
// Handle vectors differently to get better performance.
if (Ty->isVectorType()) {
@@ -1365,6 +1373,12 @@ void CodeGenFunction::EmitStoreOfScalar(
}
llvm::StoreInst *Store = Builder.CreateStore(Value, Addr, Volatile);
+ if (isNontemporal) {
+ llvm::MDNode *Node =
+ llvm::MDNode::get(Store->getContext(),
+ llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
+ Store->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
+ }
if (TBAAInfo) {
llvm::MDNode *TBAAPath = CGM.getTBAAStructTagInfo(TBAABaseType, TBAAInfo,
TBAAOffset);
@@ -1378,7 +1392,7 @@ void CodeGenFunction::EmitStoreOfScalar(
EmitStoreOfScalar(value, lvalue.getAddress(), lvalue.isVolatile(),
lvalue.getType(), lvalue.getAlignmentSource(),
lvalue.getTBAAInfo(), isInit, lvalue.getTBAABaseType(),
- lvalue.getTBAAOffset());
+ lvalue.getTBAAOffset(), lvalue.isNontemporal());
}
/// EmitLoadOfLValue - Given an expression that represents a value lvalue, this
Modified: cfe/trunk/lib/CodeGen/CGValue.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGValue.h?rev=247104&r1=247103&r2=247104&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGValue.h (original)
+++ cfe/trunk/lib/CodeGen/CGValue.h Tue Sep 8 18:52:33 2015
@@ -202,6 +202,10 @@ class LValue {
unsigned AlignSource : 2;
+ // This flag shows if a nontemporal load/stores should be used when accessing
+ // this lvalue.
+ bool Nontemporal : 1;
+
Expr *BaseIvarExp;
/// Used by struct-path-aware TBAA.
@@ -228,6 +232,7 @@ private:
// Initialize Objective-C flags.
this->Ivar = this->ObjIsArray = this->NonGC = this->GlobalObjCRef = false;
this->ImpreciseLifetime = false;
+ this->Nontemporal = false;
this->ThreadLocalRef = false;
this->BaseIvarExp = nullptr;
@@ -277,6 +282,8 @@ public:
void setARCPreciseLifetime(ARCPreciseLifetime_t value) {
ImpreciseLifetime = (value == ARCImpreciseLifetime);
}
+ bool isNontemporal() const { return Nontemporal; }
+ void setNontemporal(bool Value) { Nontemporal = Value; }
bool isObjCWeak() const {
return Quals.getObjCGCAttr() == Qualifiers::Weak;
Modified: cfe/trunk/lib/CodeGen/CodeGenFunction.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenFunction.h?rev=247104&r1=247103&r2=247104&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CodeGenFunction.h (original)
+++ cfe/trunk/lib/CodeGen/CodeGenFunction.h Tue Sep 8 18:52:33 2015
@@ -2449,7 +2449,8 @@ public:
AlignmentSource::Type,
llvm::MDNode *TBAAInfo = nullptr,
QualType TBAABaseTy = QualType(),
- uint64_t TBAAOffset = 0);
+ uint64_t TBAAOffset = 0,
+ bool isNontemporal = false);
/// EmitLoadOfScalar - Load a scalar value from an address, taking
/// care to appropriately convert from the memory representation to
@@ -2465,7 +2466,7 @@ public:
AlignmentSource AlignSource = AlignmentSource::Type,
llvm::MDNode *TBAAInfo = nullptr, bool isInit = false,
QualType TBAABaseTy = QualType(),
- uint64_t TBAAOffset = 0);
+ uint64_t TBAAOffset = 0, bool isNontemporal = false);
/// EmitStoreOfScalar - Store a scalar value to an address, taking
/// care to appropriately convert from the memory representation to
Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=247104&r1=247103&r2=247104&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
+++ cfe/trunk/lib/Sema/SemaChecking.cpp Tue Sep 8 18:52:33 2015
@@ -441,6 +441,9 @@ Sema::CheckBuiltinFunctionCall(FunctionD
case Builtin::BI__sync_swap_8:
case Builtin::BI__sync_swap_16:
return SemaBuiltinAtomicOverloaded(TheCallResult);
+ case Builtin::BI__builtin_nontemporal_load:
+ case Builtin::BI__builtin_nontemporal_store:
+ return SemaBuiltinNontemporalOverloaded(TheCallResult);
#define BUILTIN(ID, TYPE, ATTRS)
#define ATOMIC_BUILTIN(ID, TYPE, ATTRS) \
case Builtin::BI##ID: \
@@ -2210,6 +2213,78 @@ Sema::SemaBuiltinAtomicOverloaded(ExprRe
return TheCallResult;
}
+/// SemaBuiltinNontemporalOverloaded - We have a call to
+/// __builtin_nontemporal_store or __builtin_nontemporal_load, which is an
+/// overloaded function based on the pointer type of its last argument.
+///
+/// This function goes through and does final semantic checking for these
+/// builtins.
+ExprResult Sema::SemaBuiltinNontemporalOverloaded(ExprResult TheCallResult) {
+ CallExpr *TheCall = (CallExpr *)TheCallResult.get();
+ DeclRefExpr *DRE =
+ cast<DeclRefExpr>(TheCall->getCallee()->IgnoreParenCasts());
+ FunctionDecl *FDecl = cast<FunctionDecl>(DRE->getDecl());
+ unsigned BuiltinID = FDecl->getBuiltinID();
+ assert((BuiltinID == Builtin::BI__builtin_nontemporal_store ||
+ BuiltinID == Builtin::BI__builtin_nontemporal_load) &&
+ "Unexpected nontemporal load/store builtin!");
+ bool isStore = BuiltinID == Builtin::BI__builtin_nontemporal_store;
+ unsigned numArgs = isStore ? 2 : 1;
+
+ // Ensure that we have the proper number of arguments.
+ if (checkArgCount(*this, TheCall, numArgs))
+ return ExprError();
+
+ // Inspect the last argument of the nontemporal builtin. This should always
+ // be a pointer type, from which we imply the type of the memory access.
+ // Because it is a pointer type, we don't have to worry about any implicit
+ // casts here.
+ Expr *PointerArg = TheCall->getArg(numArgs - 1);
+ ExprResult PointerArgResult =
+ DefaultFunctionArrayLvalueConversion(PointerArg);
+
+ if (PointerArgResult.isInvalid())
+ return ExprError();
+ PointerArg = PointerArgResult.get();
+ TheCall->setArg(numArgs - 1, PointerArg);
+
+ const PointerType *pointerType = PointerArg->getType()->getAs<PointerType>();
+ if (!pointerType) {
+ Diag(DRE->getLocStart(), diag::err_nontemporal_builtin_must_be_pointer)
+ << PointerArg->getType() << PointerArg->getSourceRange();
+ return ExprError();
+ }
+
+ QualType ValType = pointerType->getPointeeType();
+
+ // Strip any qualifiers off ValType.
+ ValType = ValType.getUnqualifiedType();
+ if (!ValType->isIntegerType() && !ValType->isAnyPointerType() &&
+ !ValType->isBlockPointerType() && !ValType->isFloatingType() &&
+ !ValType->isVectorType()) {
+ Diag(DRE->getLocStart(),
+ diag::err_nontemporal_builtin_must_be_pointer_intfltptr_or_vector)
+ << PointerArg->getType() << PointerArg->getSourceRange();
+ return ExprError();
+ }
+
+ if (!isStore) {
+ TheCall->setType(ValType);
+ return TheCallResult;
+ }
+
+ ExprResult ValArg = TheCall->getArg(0);
+ InitializedEntity Entity = InitializedEntity::InitializeParameter(
+ Context, ValType, /*consume*/ false);
+ ValArg = PerformCopyInitialization(Entity, SourceLocation(), ValArg);
+ if (ValArg.isInvalid())
+ return ExprError();
+
+ TheCall->setArg(0, ValArg.get());
+ TheCall->setType(Context.VoidTy);
+ return TheCallResult;
+}
+
/// CheckObjCString - Checks that the argument to the builtin
/// CFString constructor is correct
/// Note: It might also make sense to do the UTF-16 conversion here (would
Added: cfe/trunk/test/CodeGen/Nontemporal.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/Nontemporal.cpp?rev=247104&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/Nontemporal.cpp (added)
+++ cfe/trunk/test/CodeGen/Nontemporal.cpp Tue Sep 8 18:52:33 2015
@@ -0,0 +1,48 @@
+// Test frontend handling of nontemporal builtins.
+// RUN: %clang_cc1 -triple x86_64-apple-darwin -emit-llvm %s -o - | FileCheck %s
+
+signed char sc;
+unsigned char uc;
+signed short ss;
+unsigned short us;
+signed int si;
+unsigned int ui;
+signed long long sll;
+unsigned long long ull;
+float f1, f2;
+double d1, d2;
+float __attribute__((vector_size(16))) vf1, vf2;
+char __attribute__((vector_size(8))) vc1, vc2;
+bool b1, b2;
+
+void test_all_sizes(void) // CHECK-LABEL: test_all_sizes
+{
+ __builtin_nontemporal_store(true, &b1); // CHECK: store i8 1, i8* @b1, align 1, !nontemporal
+ __builtin_nontemporal_store(b1, &b2); // CHECK: store i8{{.*}}, align 1, !nontemporal
+ __builtin_nontemporal_store(1, &uc); // CHECK: store i8{{.*}}align 1, !nontemporal
+ __builtin_nontemporal_store(1, &sc); // CHECK: store i8{{.*}}align 1, !nontemporal
+ __builtin_nontemporal_store(1, &us); // CHECK: store i16{{.*}}align 2, !nontemporal
+ __builtin_nontemporal_store(1, &ss); // CHECK: store i16{{.*}}align 2, !nontemporal
+ __builtin_nontemporal_store(1, &ui); // CHECK: store i32{{.*}}align 4, !nontemporal
+ __builtin_nontemporal_store(1, &si); // CHECK: store i32{{.*}}align 4, !nontemporal
+ __builtin_nontemporal_store(1, &ull); // CHECK: store i64{{.*}}align 8, !nontemporal
+ __builtin_nontemporal_store(1, &sll); // CHECK: store i64{{.*}}align 8, !nontemporal
+ __builtin_nontemporal_store(1.0, &f1); // CHECK: store float{{.*}}align 4, !nontemporal
+ __builtin_nontemporal_store(1.0, &d1); // CHECK: store double{{.*}}align 8, !nontemporal
+ __builtin_nontemporal_store(vf1, &vf2); // CHECK: store <4 x float>{{.*}}align 16, !nontemporal
+ __builtin_nontemporal_store(vc1, &vc2); // CHECK: store <8 x i8>{{.*}}align 8, !nontemporal
+
+ b1 = __builtin_nontemporal_load(&b2); // CHECK: load i8{{.*}}align 1, !nontemporal
+ uc = __builtin_nontemporal_load(&sc); // CHECK: load i8{{.*}}align 1, !nontemporal
+ sc = __builtin_nontemporal_load(&uc); // CHECK: load i8{{.*}}align 1, !nontemporal
+ us = __builtin_nontemporal_load(&ss); // CHECK: load i16{{.*}}align 2, !nontemporal
+ ss = __builtin_nontemporal_load(&us); // CHECK: load i16{{.*}}align 2, !nontemporal
+ ui = __builtin_nontemporal_load(&si); // CHECK: load i32{{.*}}align 4, !nontemporal
+ si = __builtin_nontemporal_load(&ui); // CHECK: load i32{{.*}}align 4, !nontemporal
+ ull = __builtin_nontemporal_load(&sll); // CHECK: load i64{{.*}}align 8, !nontemporal
+ sll = __builtin_nontemporal_load(&ull); // CHECK: load i64{{.*}}align 8, !nontemporal
+ f1 = __builtin_nontemporal_load(&f2); // CHECK: load float{{.*}}align 4, !nontemporal
+ d1 = __builtin_nontemporal_load(&d2); // CHECK: load double{{.*}}align 8, !nontemporal
+ vf2 = __builtin_nontemporal_load(&vf1); // CHECK: load <4 x float>{{.*}}align 16, !nontemporal
+ vc2 = __builtin_nontemporal_load(&vc1); // CHECK: load <8 x i8>{{.*}}align 8, !nontemporal
+}
More information about the cfe-commits
mailing list