[clang] 1e843a9 - [MS] Add more 128bit cmpxchg intrinsics for AArch64
Reid Kleckner via cfe-commits
cfe-commits at lists.llvm.org
Wed Nov 25 12:09:33 PST 2020
Author: Reid Kleckner
Date: 2020-11-25T12:07:28-08:00
New Revision: 1e843a987d847da48aaf41801b79cfb364937e8f
URL: https://github.com/llvm/llvm-project/commit/1e843a987d847da48aaf41801b79cfb364937e8f
DIFF: https://github.com/llvm/llvm-project/commit/1e843a987d847da48aaf41801b79cfb364937e8f.diff
LOG: [MS] Add more 128bit cmpxchg intrinsics for AArch64
The MSVC STL for requires this on ARM64.
Requested in https://llvm.org/pr47099
Depends on D92061
Differential Revision: https://reviews.llvm.org/D92062
Added:
Modified:
clang/include/clang/Basic/BuiltinsAArch64.def
clang/lib/CodeGen/CGBuiltin.cpp
clang/lib/Headers/intrin.h
clang/test/CodeGen/ms-intrinsics.c
Removed:
################################################################################
diff --git a/clang/include/clang/Basic/BuiltinsAArch64.def b/clang/include/clang/Basic/BuiltinsAArch64.def
index f07c567053de..c684105908de 100644
--- a/clang/include/clang/Basic/BuiltinsAArch64.def
+++ b/clang/include/clang/Basic/BuiltinsAArch64.def
@@ -153,6 +153,11 @@ TARGET_HEADER_BUILTIN(_InterlockedCompareExchange64_acq, "LLiLLiD*LLiLLi", "nh",
TARGET_HEADER_BUILTIN(_InterlockedCompareExchange64_nf, "LLiLLiD*LLiLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_InterlockedCompareExchange64_rel, "LLiLLiD*LLiLLi", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_InterlockedCompareExchange128, "UcLLiD*LLiLLiLLi*", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_InterlockedCompareExchange128_acq,"UcLLiD*LLiLLiLLi*", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_InterlockedCompareExchange128_nf ,"UcLLiD*LLiLLiLLi*", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+TARGET_HEADER_BUILTIN(_InterlockedCompareExchange128_rel,"UcLLiD*LLiLLiLLi*", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
+
TARGET_HEADER_BUILTIN(_InterlockedOr8_acq, "ccD*c", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_InterlockedOr8_nf, "ccD*c", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_InterlockedOr8_rel, "ccD*c", "nh", "intrin.h", ALL_MS_LANGUAGES, "")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b2bfc3c84322..828d66f83de9 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -304,6 +304,10 @@ Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
AtomicOrdering::Monotonic :
SuccessOrdering;
+ // The atomic instruction is marked volatile for consistency with MSVC. This
+ // blocks the few atomics optimizations that LLVM has. If we want to optimize
+ // _Interlocked* operations in the future, we will have to remove the volatile
+ // marker.
auto *Result = CGF.Builder.CreateAtomicCmpXchg(
Destination, Comparand, Exchange,
SuccessOrdering, FailureOrdering);
@@ -311,6 +315,68 @@ Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
return CGF.Builder.CreateExtractValue(Result, 0);
}
+// 64-bit Microsoft platforms support 128 bit cmpxchg operations. They are
+// prototyped like this:
+//
+// unsigned char _InterlockedCompareExchange128...(
+// __int64 volatile * _Destination,
+// __int64 _ExchangeHigh,
+// __int64 _ExchangeLow,
+// __int64 * _ComparandResult);
+static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF,
+ const CallExpr *E,
+ AtomicOrdering SuccessOrdering) {
+ assert(E->getNumArgs() == 4);
+ llvm::Value *Destination = CGF.EmitScalarExpr(E->getArg(0));
+ llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
+ llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
+ llvm::Value *ComparandPtr = CGF.EmitScalarExpr(E->getArg(3));
+
+ assert(Destination->getType()->isPointerTy());
+ assert(!ExchangeHigh->getType()->isPointerTy());
+ assert(!ExchangeLow->getType()->isPointerTy());
+ assert(ComparandPtr->getType()->isPointerTy());
+
+ // For Release ordering, the failure ordering should be Monotonic.
+ auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
+ ? AtomicOrdering::Monotonic
+ : SuccessOrdering;
+
+ // Convert to i128 pointers and values.
+ llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
+ llvm::Type *Int128PtrTy = Int128Ty->getPointerTo();
+ Destination = CGF.Builder.CreateBitCast(Destination, Int128PtrTy);
+ Address ComparandResult(CGF.Builder.CreateBitCast(ComparandPtr, Int128PtrTy),
+ CGF.getContext().toCharUnitsFromBits(128));
+
+ // (((i128)hi) << 64) | ((i128)lo)
+ ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
+ ExchangeLow = CGF.Builder.CreateZExt(ExchangeLow, Int128Ty);
+ ExchangeHigh =
+ CGF.Builder.CreateShl(ExchangeHigh, llvm::ConstantInt::get(Int128Ty, 64));
+ llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
+
+ // Load the comparand for the instruction.
+ llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandResult);
+
+ auto *CXI = CGF.Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
+ SuccessOrdering, FailureOrdering);
+
+ // The atomic instruction is marked volatile for consistency with MSVC. This
+ // blocks the few atomics optimizations that LLVM has. If we want to optimize
+ // _Interlocked* operations in the future, we will have to remove the volatile
+ // marker.
+ CXI->setVolatile(true);
+
+ // Store the result as an outparameter.
+ CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
+ ComparandResult);
+
+ // Get the success boolean and zero extend it to i8.
+ Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
+ return CGF.Builder.CreateZExt(Success, CGF.Int8Ty);
+}
+
static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E,
AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
assert(E->getArg(0)->getType()->isPointerType());
@@ -993,6 +1059,10 @@ enum class CodeGenFunction::MSVCIntrin {
_InterlockedCompareExchange_acq,
_InterlockedCompareExchange_rel,
_InterlockedCompareExchange_nf,
+ _InterlockedCompareExchange128,
+ _InterlockedCompareExchange128_acq,
+ _InterlockedCompareExchange128_rel,
+ _InterlockedCompareExchange128_nf,
_InterlockedOr_acq,
_InterlockedOr_rel,
_InterlockedOr_nf,
@@ -1230,6 +1300,14 @@ translateAarch64ToMsvcIntrin(unsigned BuiltinID) {
case AArch64::BI_InterlockedCompareExchange_nf:
case AArch64::BI_InterlockedCompareExchange64_nf:
return MSVCIntrin::_InterlockedCompareExchange_nf;
+ case AArch64::BI_InterlockedCompareExchange128:
+ return MSVCIntrin::_InterlockedCompareExchange128;
+ case AArch64::BI_InterlockedCompareExchange128_acq:
+ return MSVCIntrin::_InterlockedCompareExchange128_acq;
+ case AArch64::BI_InterlockedCompareExchange128_nf:
+ return MSVCIntrin::_InterlockedCompareExchange128_nf;
+ case AArch64::BI_InterlockedCompareExchange128_rel:
+ return MSVCIntrin::_InterlockedCompareExchange128_rel;
case AArch64::BI_InterlockedOr8_acq:
case AArch64::BI_InterlockedOr16_acq:
case AArch64::BI_InterlockedOr_acq:
@@ -1317,6 +1395,8 @@ translateX86ToMsvcIntrin(unsigned BuiltinID) {
return MSVCIntrin::_BitScanReverse;
case clang::X86::BI_InterlockedAnd64:
return MSVCIntrin::_InterlockedAnd;
+ case clang::X86::BI_InterlockedCompareExchange128:
+ return MSVCIntrin::_InterlockedCompareExchange128;
case clang::X86::BI_InterlockedExchange64:
return MSVCIntrin::_InterlockedExchange;
case clang::X86::BI_InterlockedExchangeAdd64:
@@ -1423,6 +1503,15 @@ Value *CodeGenFunction::EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID,
return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
case MSVCIntrin::_InterlockedCompareExchange_nf:
return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
+ case MSVCIntrin::_InterlockedCompareExchange128:
+ return EmitAtomicCmpXchg128ForMSIntrin(
+ *this, E, AtomicOrdering::SequentiallyConsistent);
+ case MSVCIntrin::_InterlockedCompareExchange128_acq:
+ return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Acquire);
+ case MSVCIntrin::_InterlockedCompareExchange128_rel:
+ return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Release);
+ case MSVCIntrin::_InterlockedCompareExchange128_nf:
+ return EmitAtomicCmpXchg128ForMSIntrin(*this, E, AtomicOrdering::Monotonic);
case MSVCIntrin::_InterlockedOr_acq:
return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
AtomicOrdering::Acquire);
@@ -14032,42 +14121,6 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
llvm::SyncScope::SingleThread);
}
- case X86::BI_InterlockedCompareExchange128: {
- // InterlockedCompareExchange128 doesn't directly refer to 128bit ints,
- // instead it takes pointers to 64bit ints for Destination and
- // ComparandResult, and exchange is taken as two 64bit ints (high & low).
- // The previous value is written to ComparandResult, and success is
- // returned.
-
- llvm::Type *Int128Ty = Builder.getInt128Ty();
- llvm::Type *Int128PtrTy = Int128Ty->getPointerTo();
-
- Value *Destination =
- Builder.CreateBitCast(Ops[0], Int128PtrTy);
- Value *ExchangeHigh128 = Builder.CreateZExt(Ops[1], Int128Ty);
- Value *ExchangeLow128 = Builder.CreateZExt(Ops[2], Int128Ty);
- Address ComparandResult(Builder.CreateBitCast(Ops[3], Int128PtrTy),
- getContext().toCharUnitsFromBits(128));
-
- Value *Exchange = Builder.CreateOr(
- Builder.CreateShl(ExchangeHigh128, 64, "", false, false),
- ExchangeLow128);
-
- Value *Comparand = Builder.CreateLoad(ComparandResult);
-
- AtomicCmpXchgInst *CXI =
- Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
- AtomicOrdering::SequentiallyConsistent,
- AtomicOrdering::SequentiallyConsistent);
- CXI->setVolatile(true);
-
- // Write the result back to the inout pointer.
- Builder.CreateStore(Builder.CreateExtractValue(CXI, 0), ComparandResult);
-
- // Get the success boolean and zero extend it to i8.
- Value *Success = Builder.CreateExtractValue(CXI, 1);
- return Builder.CreateZExt(Success, ConvertType(E->getType()));
- }
case X86::BI_AddressOfReturnAddress: {
Function *F =
diff --git a/clang/lib/Headers/intrin.h b/clang/lib/Headers/intrin.h
index e7b76a3bb2ed..a78b96997d18 100644
--- a/clang/lib/Headers/intrin.h
+++ b/clang/lib/Headers/intrin.h
@@ -214,10 +214,6 @@ unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64);
unsigned char _interlockedbittestandset64(__int64 volatile *, __int64);
long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange,
long _Comparand);
-unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination,
- __int64 _ExchangeHigh,
- __int64 _ExchangeLow,
- __int64 *_CompareandResult);
unsigned char _InterlockedCompareExchange128_np(__int64 volatile *_Destination,
__int64 _ExchangeHigh,
__int64 _ExchangeLow,
@@ -427,6 +423,26 @@ __int64 _InterlockedCompareExchange64_nf(__int64 volatile *_Destination,
__int64 _InterlockedCompareExchange64_rel(__int64 volatile *_Destination,
__int64 _Exchange, __int64 _Comparand);
#endif
+#if defined(__x86_64__) || defined(__aarch64__)
+unsigned char _InterlockedCompareExchange128(__int64 volatile *_Destination,
+ __int64 _ExchangeHigh,
+ __int64 _ExchangeLow,
+ __int64 *_ComparandResult);
+#endif
+#if defined(__aarch64__)
+unsigned char _InterlockedCompareExchange128_acq(__int64 volatile *_Destination,
+ __int64 _ExchangeHigh,
+ __int64 _ExchangeLow,
+ __int64 *_ComparandResult);
+unsigned char _InterlockedCompareExchange128_nf(__int64 volatile *_Destination,
+ __int64 _ExchangeHigh,
+ __int64 _ExchangeLow,
+ __int64 *_ComparandResult);
+unsigned char _InterlockedCompareExchange128_rel(__int64 volatile *_Destination,
+ __int64 _ExchangeHigh,
+ __int64 _ExchangeLow,
+ __int64 *_ComparandResult);
+#endif
/*----------------------------------------------------------------------------*\
|* movs, stos
diff --git a/clang/test/CodeGen/ms-intrinsics.c b/clang/test/CodeGen/ms-intrinsics.c
index 14e591e92a0e..90a97b5cb046 100644
--- a/clang/test/CodeGen/ms-intrinsics.c
+++ b/clang/test/CodeGen/ms-intrinsics.c
@@ -6,10 +6,10 @@
// RUN: | FileCheck %s --check-prefixes CHECK,CHECK-ARM,CHECK-ARM-ARM64,CHECK-ARM-X64
// RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
// RUN: -triple x86_64--windows -Oz -emit-llvm -target-feature +cx16 %s -o - \
-// RUN: | FileCheck %s --check-prefixes CHECK,CHECK-X64,CHECK-ARM-X64,CHECK-INTEL
+// RUN: | FileCheck %s --check-prefixes CHECK,CHECK-X64,CHECK-ARM-X64,CHECK-INTEL,CHECK-64
// RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
// RUN: -triple aarch64-windows -Oz -emit-llvm %s -o - \
-// RUN: | FileCheck %s --check-prefixes CHECK-ARM-ARM64,CHECK-ARM-X64,CHECK-ARM64
+// RUN: | FileCheck %s --check-prefixes CHECK-ARM-ARM64,CHECK-ARM-X64,CHECK-ARM64,CHECK-64
// intrin.h needs size_t, but -ffreestanding prevents us from getting it from
// stddef.h. Work around it with this typedef.
@@ -432,32 +432,59 @@ __int64 test_InterlockedCompareExchange64(__int64 volatile *Destination, __int64
// CHECK: ret i64 [[RESULT]]
// CHECK: }
-#if defined(__x86_64__)
+#if defined(__x86_64__) || defined(__aarch64__)
unsigned char test_InterlockedCompareExchange128(
__int64 volatile *Destination, __int64 ExchangeHigh,
__int64 ExchangeLow, __int64 *ComparandResult) {
return _InterlockedCompareExchange128(++Destination, ++ExchangeHigh,
++ExchangeLow, ++ComparandResult);
}
-// CHECK-X64: define{{.*}}i8 @test_InterlockedCompareExchange128(i64*{{[a-z_ ]*}}%Destination, i64{{[a-z_ ]*}}%ExchangeHigh, i64{{[a-z_ ]*}}%ExchangeLow, i64*{{[a-z_ ]*}}%ComparandResult){{.*}}{
-// CHECK-X64: %incdec.ptr = getelementptr inbounds i64, i64* %Destination, i64 1
-// CHECK-X64: %inc = add nsw i64 %ExchangeHigh, 1
-// CHECK-X64: %inc1 = add nsw i64 %ExchangeLow, 1
-// CHECK-X64: %incdec.ptr2 = getelementptr inbounds i64, i64* %ComparandResult, i64 1
-// CHECK-X64: [[DST:%[0-9]+]] = bitcast i64* %incdec.ptr to i128*
-// CHECK-X64: [[EH:%[0-9]+]] = zext i64 %inc to i128
-// CHECK-X64: [[EL:%[0-9]+]] = zext i64 %inc1 to i128
-// CHECK-X64: [[CNR:%[0-9]+]] = bitcast i64* %incdec.ptr2 to i128*
-// CHECK-X64: [[EHS:%[0-9]+]] = shl nuw i128 [[EH]], 64
-// CHECK-X64: [[EXP:%[0-9]+]] = or i128 [[EHS]], [[EL]]
-// CHECK-X64: [[ORG:%[0-9]+]] = load i128, i128* [[CNR]], align 16
-// CHECK-X64: [[RES:%[0-9]+]] = cmpxchg volatile i128* [[DST]], i128 [[ORG]], i128 [[EXP]] seq_cst seq_cst
-// CHECK-X64: [[OLD:%[0-9]+]] = extractvalue { i128, i1 } [[RES]], 0
-// CHECK-X64: store i128 [[OLD]], i128* [[CNR]], align 16
-// CHECK-X64: [[SUC1:%[0-9]+]] = extractvalue { i128, i1 } [[RES]], 1
-// CHECK-X64: [[SUC8:%[0-9]+]] = zext i1 [[SUC1]] to i8
-// CHECK-X64: ret i8 [[SUC8]]
-// CHECK-X64: }
+// CHECK-64: define{{.*}}i8 @test_InterlockedCompareExchange128(i64*{{[a-z_ ]*}}%Destination, i64{{[a-z_ ]*}}%ExchangeHigh, i64{{[a-z_ ]*}}%ExchangeLow, i64*{{[a-z_ ]*}}%ComparandResult){{.*}}{
+// CHECK-64: %incdec.ptr = getelementptr inbounds i64, i64* %Destination, i64 1
+// CHECK-64: %inc = add nsw i64 %ExchangeHigh, 1
+// CHECK-64: %inc1 = add nsw i64 %ExchangeLow, 1
+// CHECK-64: %incdec.ptr2 = getelementptr inbounds i64, i64* %ComparandResult, i64 1
+// CHECK-64: [[DST:%[0-9]+]] = bitcast i64* %incdec.ptr to i128*
+// CHECK-64: [[CNR:%[0-9]+]] = bitcast i64* %incdec.ptr2 to i128*
+// CHECK-64: [[EH:%[0-9]+]] = zext i64 %inc to i128
+// CHECK-64: [[EL:%[0-9]+]] = zext i64 %inc1 to i128
+// CHECK-64: [[EHS:%[0-9]+]] = shl nuw i128 [[EH]], 64
+// CHECK-64: [[EXP:%[0-9]+]] = or i128 [[EHS]], [[EL]]
+// CHECK-64: [[ORG:%[0-9]+]] = load i128, i128* [[CNR]], align 16
+// CHECK-64: [[RES:%[0-9]+]] = cmpxchg volatile i128* [[DST]], i128 [[ORG]], i128 [[EXP]] seq_cst seq_cst
+// CHECK-64: [[OLD:%[0-9]+]] = extractvalue { i128, i1 } [[RES]], 0
+// CHECK-64: store i128 [[OLD]], i128* [[CNR]], align 16
+// CHECK-64: [[SUC1:%[0-9]+]] = extractvalue { i128, i1 } [[RES]], 1
+// CHECK-64: [[SUC8:%[0-9]+]] = zext i1 [[SUC1]] to i8
+// CHECK-64: ret i8 [[SUC8]]
+// CHECK-64: }
+#endif
+
+#if defined(__aarch64__)
+unsigned char test_InterlockedCompareExchange128_acq(
+ __int64 volatile *Destination, __int64 ExchangeHigh,
+ __int64 ExchangeLow, __int64 *ComparandResult) {
+ return _InterlockedCompareExchange128_acq(Destination, ExchangeHigh,
+ ExchangeLow, ComparandResult);
+}
+unsigned char test_InterlockedCompareExchange128_nf(
+ __int64 volatile *Destination, __int64 ExchangeHigh,
+ __int64 ExchangeLow, __int64 *ComparandResult) {
+ return _InterlockedCompareExchange128_nf(Destination, ExchangeHigh,
+ ExchangeLow, ComparandResult);
+}
+unsigned char test_InterlockedCompareExchange128_rel(
+ __int64 volatile *Destination, __int64 ExchangeHigh,
+ __int64 ExchangeLow, __int64 *ComparandResult) {
+ return _InterlockedCompareExchange128_rel(Destination, ExchangeHigh,
+ ExchangeLow, ComparandResult);
+}
+// CHECK-ARM64: define{{.*}}i8 @test_InterlockedCompareExchange128_acq({{.*}})
+// CHECK-ARM64: cmpxchg volatile i128* %{{.*}}, i128 %{{.*}}, i128 %{{.*}} acquire acquire
+// CHECK-ARM64: define{{.*}}i8 @test_InterlockedCompareExchange128_nf({{.*}})
+// CHECK-ARM64: cmpxchg volatile i128* %{{.*}}, i128 %{{.*}}, i128 %{{.*}} monotonic monotonic
+// CHECK-ARM64: define{{.*}}i8 @test_InterlockedCompareExchange128_rel({{.*}})
+// CHECK-ARM64: cmpxchg volatile i128* %{{.*}}, i128 %{{.*}}, i128 %{{.*}} release monotonic
#endif
short test_InterlockedIncrement16(short volatile *Addend) {
More information about the cfe-commits
mailing list