[clang] Use Address for CGBuilder's CreateAtomicRMW and CreateAtomicCmpXchg. (PR #74349)
James Y Knight via cfe-commits
cfe-commits at lists.llvm.org
Mon Dec 4 09:40:43 PST 2023
https://github.com/jyknight created https://github.com/llvm/llvm-project/pull/74349
Update all callers to pass through the Address.
For the older builtins such as `__sync_*` and MSVC `_Interlocked*`, natural alignment of the atomic access is _assumed_. This change preserves that behavior. It will pass through greater-than-required alignments, however.
>From 7fd7ebbf60beacb63ddfff16a7c4405e80cb62b4 Mon Sep 17 00:00:00 2001
From: James Y Knight <jyknight at google.com>
Date: Mon, 4 Dec 2023 12:11:58 -0500
Subject: [PATCH] Use Address for CGBuilder's CreateAtomicRMW and
CreateAtomicCmpXchg.
Update all callers to pass through the Address.
For the older builtins such as `__sync_*` and MSVC `_Interlocked*`,
natural alignment of the atomic memory is _assumed_. This change
preserves that behavior.
---
clang/lib/CodeGen/CGAtomic.cpp | 8 +-
clang/lib/CodeGen/CGBuilder.h | 17 +--
clang/lib/CodeGen/CGBuiltin.cpp | 130 +++++++++---------
clang/lib/CodeGen/CGExprScalar.cpp | 6 +-
clang/lib/CodeGen/CGStmtOpenMP.cpp | 2 +-
clang/test/CodeGen/atomic-ops.c | 4 +-
.../test/CodeGen/ms-intrinsics-underaligned.c | 110 +++++++++++++++
clang/test/CodeGen/ms-intrinsics.c | 4 +-
.../OpenMP/parallel_reduction_codegen.cpp | 6 +-
9 files changed, 195 insertions(+), 92 deletions(-)
create mode 100644 clang/test/CodeGen/ms-intrinsics-underaligned.c
diff --git a/clang/lib/CodeGen/CGAtomic.cpp b/clang/lib/CodeGen/CGAtomic.cpp
index 6005d5c51c0e1..379c833af32a2 100644
--- a/clang/lib/CodeGen/CGAtomic.cpp
+++ b/clang/lib/CodeGen/CGAtomic.cpp
@@ -383,8 +383,7 @@ static void emitAtomicCmpXchg(CodeGenFunction &CGF, AtomicExpr *E, bool IsWeak,
llvm::Value *Desired = CGF.Builder.CreateLoad(Val2);
llvm::AtomicCmpXchgInst *Pair = CGF.Builder.CreateAtomicCmpXchg(
- Ptr.getPointer(), Expected, Desired, SuccessOrder, FailureOrder,
- Scope);
+ Ptr, Expected, Desired, SuccessOrder, FailureOrder, Scope);
Pair->setVolatile(E->isVolatile());
Pair->setWeak(IsWeak);
@@ -699,7 +698,7 @@ static void EmitAtomicOp(CodeGenFunction &CGF, AtomicExpr *E, Address Dest,
llvm::Value *LoadVal1 = CGF.Builder.CreateLoad(Val1);
llvm::AtomicRMWInst *RMWI =
- CGF.Builder.CreateAtomicRMW(Op, Ptr.getPointer(), LoadVal1, Order, Scope);
+ CGF.Builder.CreateAtomicRMW(Op, Ptr, LoadVal1, Order, Scope);
RMWI->setVolatile(E->isVolatile());
// For __atomic_*_fetch operations, perform the operation again to
@@ -1740,8 +1739,7 @@ std::pair<llvm::Value *, llvm::Value *> AtomicInfo::EmitAtomicCompareExchangeOp(
llvm::AtomicOrdering Success, llvm::AtomicOrdering Failure, bool IsWeak) {
// Do the atomic store.
Address Addr = getAtomicAddressAsAtomicIntPointer();
- auto *Inst = CGF.Builder.CreateAtomicCmpXchg(Addr.getPointer(),
- ExpectedVal, DesiredVal,
+ auto *Inst = CGF.Builder.CreateAtomicCmpXchg(Addr, ExpectedVal, DesiredVal,
Success, Failure);
// Other decoration.
Inst->setVolatile(LVal.isVolatileQualified());
diff --git a/clang/lib/CodeGen/CGBuilder.h b/clang/lib/CodeGen/CGBuilder.h
index 68535920088c4..bf5ab171d720d 100644
--- a/clang/lib/CodeGen/CGBuilder.h
+++ b/clang/lib/CodeGen/CGBuilder.h
@@ -126,25 +126,22 @@ class CGBuilderTy : public CGBuilderBaseTy {
return CreateAlignedStore(getInt1(Value), Addr, CharUnits::One());
}
- // Temporarily use old signature; clang will be updated to an Address overload
- // in a subsequent patch.
llvm::AtomicCmpXchgInst *
- CreateAtomicCmpXchg(llvm::Value *Ptr, llvm::Value *Cmp, llvm::Value *New,
+ CreateAtomicCmpXchg(Address Addr, llvm::Value *Cmp, llvm::Value *New,
llvm::AtomicOrdering SuccessOrdering,
llvm::AtomicOrdering FailureOrdering,
llvm::SyncScope::ID SSID = llvm::SyncScope::System) {
return CGBuilderBaseTy::CreateAtomicCmpXchg(
- Ptr, Cmp, New, llvm::MaybeAlign(), SuccessOrdering, FailureOrdering,
- SSID);
+ Addr.getPointer(), Cmp, New, Addr.getAlignment().getAsAlign(),
+ SuccessOrdering, FailureOrdering, SSID);
}
- // Temporarily use old signature; clang will be updated to an Address overload
- // in a subsequent patch.
llvm::AtomicRMWInst *
- CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, llvm::Value *Ptr,
- llvm::Value *Val, llvm::AtomicOrdering Ordering,
+ CreateAtomicRMW(llvm::AtomicRMWInst::BinOp Op, Address Addr, llvm::Value *Val,
+ llvm::AtomicOrdering Ordering,
llvm::SyncScope::ID SSID = llvm::SyncScope::System) {
- return CGBuilderBaseTy::CreateAtomicRMW(Op, Ptr, Val, llvm::MaybeAlign(),
+ return CGBuilderBaseTy::CreateAtomicRMW(Op, Addr.getPointer(), Val,
+ Addr.getAlignment().getAsAlign(),
Ordering, SSID);
}
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 65d9862621061..14e20bf16811c 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -188,8 +188,8 @@ static Value *EmitFromInt(CodeGenFunction &CGF, llvm::Value *V,
return V;
}
-static llvm::Value *CheckAtomicAlignment(CodeGenFunction &CGF,
- const CallExpr *E) {
+static Address CheckAtomicAlignment(CodeGenFunction &CGF,
+ const CallExpr *E) {
ASTContext &Ctx = CGF.getContext();
Address Ptr = CGF.EmitPointerWithAlignment(E->getArg(0));
unsigned Bytes = Ptr.getElementType()->isPointerTy()
@@ -199,8 +199,10 @@ static llvm::Value *CheckAtomicAlignment(CodeGenFunction &CGF,
if (Align % Bytes != 0) {
DiagnosticsEngine &Diags = CGF.CGM.getDiags();
Diags.Report(E->getBeginLoc(), diag::warn_sync_op_misaligned);
+ // Force address to be at least naturally-aligned.
+ return Ptr.withAlignment(CharUnits::fromQuantity(Bytes));
}
- return Ptr.getPointer();
+ return Ptr;
}
/// Utility to insert an atomic instruction based on Intrinsic::ID
@@ -215,19 +217,17 @@ static Value *MakeBinaryAtomicValue(
E->getArg(0)->getType()->getPointeeType()));
assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
- llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E);
+ Address DestAddr = CheckAtomicAlignment(CGF, E);
llvm::IntegerType *IntType = llvm::IntegerType::get(
CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
- llvm::Value *Args[2];
- Args[0] = DestPtr;
- Args[1] = CGF.EmitScalarExpr(E->getArg(1));
- llvm::Type *ValueType = Args[1]->getType();
- Args[1] = EmitToInt(CGF, Args[1], T, IntType);
+ llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
+ llvm::Type *ValueType = Val->getType();
+ Val = EmitToInt(CGF, Val, T, IntType);
- llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
- Kind, Args[0], Args[1], Ordering);
+ llvm::Value *Result =
+ CGF.Builder.CreateAtomicRMW(Kind, DestAddr, Val, Ordering);
return EmitFromInt(CGF, Result, T, ValueType);
}
@@ -270,20 +270,18 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
E->getArg(0)->getType()->getPointeeType()));
assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
- llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E);
+ Address DestAddr = CheckAtomicAlignment(CGF, E);
llvm::IntegerType *IntType = llvm::IntegerType::get(
CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
- llvm::Value *Args[2];
- Args[1] = CGF.EmitScalarExpr(E->getArg(1));
- llvm::Type *ValueType = Args[1]->getType();
- Args[1] = EmitToInt(CGF, Args[1], T, IntType);
- Args[0] = DestPtr;
+ llvm::Value *Val = CGF.EmitScalarExpr(E->getArg(1));
+ llvm::Type *ValueType = Val->getType();
+ Val = EmitToInt(CGF, Val, T, IntType);
llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
- Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
- Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
+ Kind, DestAddr, Val, llvm::AtomicOrdering::SequentiallyConsistent);
+ Result = CGF.Builder.CreateBinOp(Op, Result, Val);
if (Invert)
Result =
CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
@@ -309,20 +307,18 @@ static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF,
static Value *MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E,
bool ReturnBool) {
QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
- llvm::Value *DestPtr = CheckAtomicAlignment(CGF, E);
+ Address DestAddr = CheckAtomicAlignment(CGF, E);
llvm::IntegerType *IntType = llvm::IntegerType::get(
CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
- Value *Args[3];
- Args[0] = DestPtr;
- Args[1] = CGF.EmitScalarExpr(E->getArg(1));
- llvm::Type *ValueType = Args[1]->getType();
- Args[1] = EmitToInt(CGF, Args[1], T, IntType);
- Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
+ Value *Cmp = CGF.EmitScalarExpr(E->getArg(1));
+ llvm::Type *ValueType = Cmp->getType();
+ Cmp = EmitToInt(CGF, Cmp, T, IntType);
+ Value *New = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
- Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
+ DestAddr, Cmp, New, llvm::AtomicOrdering::SequentiallyConsistent,
llvm::AtomicOrdering::SequentiallyConsistent);
if (ReturnBool)
// Extract boolean success flag and zext it to int.
@@ -358,7 +354,8 @@ Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
E->getArg(2)->getType()));
- auto *Destination = CGF.EmitScalarExpr(E->getArg(0));
+ Address DestAddr = CheckAtomicAlignment(CGF, E);
+
auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
@@ -372,8 +369,7 @@ Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
// _Interlocked* operations in the future, we will have to remove the volatile
// marker.
auto *Result = CGF.Builder.CreateAtomicCmpXchg(
- Destination, Comparand, Exchange,
- SuccessOrdering, FailureOrdering);
+ DestAddr, Comparand, Exchange, SuccessOrdering, FailureOrdering);
Result->setVolatile(true);
return CGF.Builder.CreateExtractValue(Result, 0);
}
@@ -386,29 +382,34 @@ Value *EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E,
// __int64 _ExchangeHigh,
// __int64 _ExchangeLow,
// __int64 * _ComparandResult);
+//
+// Note that Destination is assumed to be at least 16-byte aligned, despite
+// being typed int64.
+
static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF,
const CallExpr *E,
AtomicOrdering SuccessOrdering) {
assert(E->getNumArgs() == 4);
- llvm::Value *Destination = CGF.EmitScalarExpr(E->getArg(0));
+ llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
llvm::Value *ExchangeHigh = CGF.EmitScalarExpr(E->getArg(1));
llvm::Value *ExchangeLow = CGF.EmitScalarExpr(E->getArg(2));
- llvm::Value *ComparandPtr = CGF.EmitScalarExpr(E->getArg(3));
+ Address ComparandAddr = CGF.EmitPointerWithAlignment(E->getArg(3));
- assert(Destination->getType()->isPointerTy());
+ assert(DestPtr->getType()->isPointerTy());
assert(!ExchangeHigh->getType()->isPointerTy());
assert(!ExchangeLow->getType()->isPointerTy());
- assert(ComparandPtr->getType()->isPointerTy());
// For Release ordering, the failure ordering should be Monotonic.
auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release
? AtomicOrdering::Monotonic
: SuccessOrdering;
- // Convert to i128 pointers and values.
+ // Convert to i128 pointers and values. Alignment is also overridden for
+ // destination pointer.
llvm::Type *Int128Ty = llvm::IntegerType::get(CGF.getLLVMContext(), 128);
- Address ComparandResult(ComparandPtr, Int128Ty,
- CGF.getContext().toCharUnitsFromBits(128));
+ Address DestAddr(DestPtr, Int128Ty,
+ CGF.getContext().toCharUnitsFromBits(128));
+ ComparandAddr = ComparandAddr.withElementType(Int128Ty);
// (((i128)hi) << 64) | ((i128)lo)
ExchangeHigh = CGF.Builder.CreateZExt(ExchangeHigh, Int128Ty);
@@ -418,9 +419,9 @@ static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF,
llvm::Value *Exchange = CGF.Builder.CreateOr(ExchangeHigh, ExchangeLow);
// Load the comparand for the instruction.
- llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandResult);
+ llvm::Value *Comparand = CGF.Builder.CreateLoad(ComparandAddr);
- auto *CXI = CGF.Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
+ auto *CXI = CGF.Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
SuccessOrdering, FailureOrdering);
// The atomic instruction is marked volatile for consistency with MSVC. This
@@ -431,7 +432,7 @@ static Value *EmitAtomicCmpXchg128ForMSIntrin(CodeGenFunction &CGF,
// Store the result as an outparameter.
CGF.Builder.CreateStore(CGF.Builder.CreateExtractValue(CXI, 0),
- ComparandResult);
+ ComparandAddr);
// Get the success boolean and zero extend it to i8.
Value *Success = CGF.Builder.CreateExtractValue(CXI, 1);
@@ -443,24 +444,21 @@ static Value *EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E,
assert(E->getArg(0)->getType()->isPointerType());
auto *IntTy = CGF.ConvertType(E->getType());
+ Address DestAddr = CheckAtomicAlignment(CGF, E);
auto *Result = CGF.Builder.CreateAtomicRMW(
- AtomicRMWInst::Add,
- CGF.EmitScalarExpr(E->getArg(0)),
- ConstantInt::get(IntTy, 1),
- Ordering);
+ AtomicRMWInst::Add, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
}
-static Value *EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E,
+static Value *EmitAtomicDecrementValue(
+ CodeGenFunction &CGF, const CallExpr *E,
AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
assert(E->getArg(0)->getType()->isPointerType());
auto *IntTy = CGF.ConvertType(E->getType());
+ Address DestAddr = CheckAtomicAlignment(CGF, E);
auto *Result = CGF.Builder.CreateAtomicRMW(
- AtomicRMWInst::Sub,
- CGF.EmitScalarExpr(E->getArg(0)),
- ConstantInt::get(IntTy, 1),
- Ordering);
+ AtomicRMWInst::Sub, DestAddr, ConstantInt::get(IntTy, 1), Ordering);
return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
}
@@ -1215,8 +1213,7 @@ static llvm::Value *EmitBitTestIntrinsic(CodeGenFunction &CGF,
Mask = CGF.Builder.CreateNot(Mask);
RMWOp = llvm::AtomicRMWInst::And;
}
- OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr.getPointer(), Mask,
- Ordering);
+ OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr, Mask, Ordering);
} else {
// Emit a plain load for the non-interlocked intrinsics.
OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
@@ -4456,14 +4453,13 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
case Builtin::BI__sync_lock_release_4:
case Builtin::BI__sync_lock_release_8:
case Builtin::BI__sync_lock_release_16: {
- Value *Ptr = CheckAtomicAlignment(*this, E);
+ Address Ptr = CheckAtomicAlignment(*this, E);
QualType ElTy = E->getArg(0)->getType()->getPointeeType();
- CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
+
llvm::Type *ITy =
- llvm::IntegerType::get(getLLVMContext(), StoreSize.getQuantity() * 8);
+ llvm::IntegerType::get(getLLVMContext(), getContext().getTypeSize(ElTy));
llvm::StoreInst *Store =
- Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
- StoreSize);
+ Builder.CreateStore(llvm::Constant::getNullValue(ITy), Ptr);
Store->setAtomic(llvm::AtomicOrdering::Release);
return RValue::get(nullptr);
}
@@ -4514,7 +4510,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
bool Volatile =
PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
- Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Address Ptr = EmitPointerWithAlignment(E->getArg(0)).withElementType(Int8Ty);
+
Value *NewVal = Builder.getInt8(1);
Value *Order = EmitScalarExpr(E->getArg(1));
if (isa<llvm::ConstantInt>(Order)) {
@@ -5035,7 +5032,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
llvm::IntegerType *IntType = IntegerType::get(
getLLVMContext(), getContext().getTypeSize(E->getType()));
- llvm::Value *Destination = EmitScalarExpr(E->getArg(0));
+ Address DestAddr = CheckAtomicAlignment(*this, E);
llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
RTy = Exchange->getType();
@@ -5048,7 +5045,7 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
- auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
+ auto Result = Builder.CreateAtomicCmpXchg(DestAddr, Comparand, Exchange,
Ordering, Ordering);
Result->setVolatile(true);
@@ -11901,12 +11898,12 @@ Value *CodeGenFunction::EmitAArch64BuiltinExpr(unsigned BuiltinID,
}
case clang::AArch64::BI_InterlockedAdd: {
- Value *Arg0 = EmitScalarExpr(E->getArg(0));
- Value *Arg1 = EmitScalarExpr(E->getArg(1));
+ Address DestAddr = CheckAtomicAlignment(*this, E);
+ Value *Val = EmitScalarExpr(E->getArg(1));
AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
- AtomicRMWInst::Add, Arg0, Arg1,
+ AtomicRMWInst::Add, DestAddr, Val,
llvm::AtomicOrdering::SequentiallyConsistent);
- return Builder.CreateAdd(RMWI, Arg1);
+ return Builder.CreateAdd(RMWI, Val);
}
}
@@ -18219,7 +18216,7 @@ Value *CodeGenFunction::EmitAMDGPUBuiltinExpr(unsigned BuiltinID,
break;
}
- Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Address Ptr = CheckAtomicAlignment(*this, E);
Value *Val = EmitScalarExpr(E->getArg(1));
ProcessOrderScopeAMDGCN(EmitScalarExpr(E->getArg(2)),
@@ -19082,9 +19079,10 @@ Value *CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID,
case NVPTX::BI__nvvm_atom_add_gen_f:
case NVPTX::BI__nvvm_atom_add_gen_d: {
- Value *Ptr = EmitScalarExpr(E->getArg(0));
+ Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
Value *Val = EmitScalarExpr(E->getArg(1));
- return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, Ptr, Val,
+
+ return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, DestAddr, Val,
AtomicOrdering::SequentiallyConsistent);
}
diff --git a/clang/lib/CodeGen/CGExprScalar.cpp b/clang/lib/CodeGen/CGExprScalar.cpp
index 378437364767f..41ad2ddac30d2 100644
--- a/clang/lib/CodeGen/CGExprScalar.cpp
+++ b/clang/lib/CodeGen/CGExprScalar.cpp
@@ -2571,7 +2571,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
// For atomic bool increment, we just store true and return it for
// preincrement, do an atomic swap with true for postincrement
return Builder.CreateAtomicRMW(
- llvm::AtomicRMWInst::Xchg, LV.getPointer(CGF), True,
+ llvm::AtomicRMWInst::Xchg, LV.getAddress(CGF), True,
llvm::AtomicOrdering::SequentiallyConsistent);
}
// Special case for atomic increment / decrement on integers, emit
@@ -2589,7 +2589,7 @@ ScalarExprEmitter::EmitScalarPrePostIncDec(const UnaryOperator *E, LValue LV,
llvm::Value *amt = CGF.EmitToMemory(
llvm::ConstantInt::get(ConvertType(type), 1, true), type);
llvm::Value *old =
- Builder.CreateAtomicRMW(aop, LV.getPointer(CGF), amt,
+ Builder.CreateAtomicRMW(aop, LV.getAddress(CGF), amt,
llvm::AtomicOrdering::SequentiallyConsistent);
return isPre ? Builder.CreateBinOp(op, old, amt) : old;
}
@@ -3314,7 +3314,7 @@ LValue ScalarExprEmitter::EmitCompoundAssignLValue(
E->getExprLoc()),
LHSTy);
Value *OldVal = Builder.CreateAtomicRMW(
- AtomicOp, LHSLV.getPointer(CGF), Amt,
+ AtomicOp, LHSLV.getAddress(CGF), Amt,
llvm::AtomicOrdering::SequentiallyConsistent);
// Since operation is atomic, the result type is guaranteed to be the
diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp
index 90c7ed450e54b..842a45ff0ca14 100644
--- a/clang/lib/CodeGen/CGStmtOpenMP.cpp
+++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp
@@ -6206,7 +6206,7 @@ static std::pair<bool, RValue> emitOMPAtomicRMW(CodeGenFunction &CGF, LValue X,
X.getAddress(CGF).getElementType());
}
llvm::Value *Res =
- CGF.Builder.CreateAtomicRMW(RMWOp, X.getPointer(CGF), UpdateVal, AO);
+ CGF.Builder.CreateAtomicRMW(RMWOp, X.getAddress(CGF), UpdateVal, AO);
return std::make_pair(true, RValue::get(Res));
}
diff --git a/clang/test/CodeGen/atomic-ops.c b/clang/test/CodeGen/atomic-ops.c
index 1295786524a0d..9ac05d270b97c 100644
--- a/clang/test/CodeGen/atomic-ops.c
+++ b/clang/test/CodeGen/atomic-ops.c
@@ -697,9 +697,9 @@ void test_underaligned(void) {
__atomic_load(&aligned_a, &aligned_b, memory_order_seq_cst);
// CHECK: store atomic i64 {{.*}}, align 16
__atomic_store(&aligned_a, &aligned_b, memory_order_seq_cst);
- // CHECK: atomicrmw xchg ptr {{.*}}, align 8
+ // CHECK: atomicrmw xchg ptr {{.*}}, align 16
__atomic_exchange(&aligned_a, &aligned_b, &aligned_c, memory_order_seq_cst);
- // CHECK: cmpxchg weak ptr {{.*}}, align 8
+ // CHECK: cmpxchg weak ptr {{.*}}, align 16
__atomic_compare_exchange(&aligned_a, &aligned_b, &aligned_c, 1, memory_order_seq_cst, memory_order_seq_cst);
}
diff --git a/clang/test/CodeGen/ms-intrinsics-underaligned.c b/clang/test/CodeGen/ms-intrinsics-underaligned.c
new file mode 100644
index 0000000000000..e1f0d2cba8e25
--- /dev/null
+++ b/clang/test/CodeGen/ms-intrinsics-underaligned.c
@@ -0,0 +1,110 @@
+// RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
+// RUN: -triple x86_64--windows -Oz -emit-llvm -target-feature +cx16 %s -o - \
+// RUN: | FileCheck %s --check-prefixes=CHECK
+
+// RUN: %clang_cc1 -ffreestanding -fms-extensions -fms-compatibility -fms-compatibility-version=17.00 \
+// RUN: -triple aarch64--windows -Oz -emit-llvm %s -o - \
+// RUN: | FileCheck %s --check-prefixes=CHECK,CHECK-AARCH64
+
+// Ensure that we emit _Interlocked atomic operations specifying natural
+// alignment, even when clang's usual alignment derivation would result in a
+// lower alignment value.
+
+// intrin.h needs size_t, but -ffreestanding prevents us from getting it from
+// stddef.h. Work around it with this typedef.
+typedef __SIZE_TYPE__ size_t;
+
+#include <intrin.h>
+
+#pragma pack(1)
+typedef struct {
+ char a;
+ short b;
+ long c;
+ long long d;
+ void *p;
+} X;
+
+_Static_assert(sizeof(X) == 23, "");
+_Static_assert(__alignof__(X) == 1, "");
+
+// CHECK-LABEL: @test_InterlockedExchangePointer(
+// CHECK: atomicrmw {{.*}} align 8
+void *test_InterlockedExchangePointer(X *x) {
+ return _InterlockedExchangePointer(&x->p, 0);
+}
+
+// CHECK-LABEL: @test_InterlockedExchange8(
+// CHECK: atomicrmw {{.*}} align 1
+char test_InterlockedExchange8(X *x) {
+ return _InterlockedExchange8(&x->a, 0);
+}
+
+// CHECK-LABEL: @test_InterlockedExchange16(
+// CHECK: atomicrmw {{.*}} align 2
+short test_InterlockedExchange16(X *x) {
+ return _InterlockedExchange16(&x->b, 0);
+}
+
+// CHECK-LABEL: @test_InterlockedExchange(
+// CHECK: atomicrmw {{.*}} align 4
+long test_InterlockedExchange(X *x) {
+ return _InterlockedExchange(&x->c, 0);
+}
+
+// CHECK-LABEL: @test_InterlockedExchange64(
+// CHECK: atomicrmw {{.*}} align 8
+long long test_InterlockedExchange64(X *x) {
+ return _InterlockedExchange64(&x->d, 0);
+}
+
+// CHECK-LABEL: @test_InterlockedIncrement(
+// CHECK: atomicrmw {{.*}} align 4
+long test_InterlockedIncrement(X *x) {
+ return _InterlockedIncrement(&x->c);
+}
+
+// CHECK-LABEL: @test_InterlockedDecrement16(
+// CHECK: atomicrmw {{.*}} align 2
+short test_InterlockedDecrement16(X *x) {
+ return _InterlockedDecrement16(&x->b);
+}
+
+
+// CHECK-LABEL: @test_InterlockedCompareExchangePointer(
+// CHECK: cmpxchg {{.*}} align 8
+void *test_InterlockedCompareExchangePointer(X *x) {
+ return _InterlockedCompareExchangePointer(&x->p, 0, 0);
+}
+
+// CHECK-LABEL: @test_InterlockedCompareExchange8(
+// CHECK: cmpxchg {{.*}} align 1
+char test_InterlockedCompareExchange8(X *x) {
+ return _InterlockedCompareExchange8(&x->a, 0, 0);
+}
+
+// CHECK-LABEL: @test_InterlockedCompareExchange16(
+// CHECK: cmpxchg {{.*}} align 2
+short test_InterlockedCompareExchange16(X *x) {
+ return _InterlockedCompareExchange16(&x->b, 0, 0);
+}
+
+// CHECK-LABEL: @test_InterlockedCompareExchange(
+// CHECK: cmpxchg {{.*}} align 4
+long test_InterlockedCompareExchange(X *x) {
+ return _InterlockedCompareExchange(&x->c, 0, 0);
+}
+
+// CHECK-LABEL: @test_InterlockedCompareExchange64(
+// CHECK: cmpxchg {{.*}} align 8
+long long test_InterlockedCompareExchange64(X *x) {
+ return _InterlockedCompareExchange64(&x->d, 0, 0);
+}
+
+#ifdef __aarch64__
+// CHECK-AARCH64-LABEL: @test_InterlockedAdd(
+// CHECK-AARCH64: atomicrmw {{.*}} align 4
+long test_InterlockedAdd(X *x) {
+ return _InterlockedAdd(&x->c, 4);
+}
+#endif
diff --git a/clang/test/CodeGen/ms-intrinsics.c b/clang/test/CodeGen/ms-intrinsics.c
index debc84404aed1..ffab8b998d8bd 100644
--- a/clang/test/CodeGen/ms-intrinsics.c
+++ b/clang/test/CodeGen/ms-intrinsics.c
@@ -445,10 +445,10 @@ unsigned char test_InterlockedCompareExchange128(
// CHECK-64: [[EL:%[0-9]+]] = zext i64 %inc1 to i128
// CHECK-64: [[EHS:%[0-9]+]] = shl nuw i128 [[EH]], 64
// CHECK-64: [[EXP:%[0-9]+]] = or disjoint i128 [[EHS]], [[EL]]
-// CHECK-64: [[ORG:%[0-9]+]] = load i128, ptr %incdec.ptr2, align 16
+// CHECK-64: [[ORG:%[0-9]+]] = load i128, ptr %incdec.ptr2, align 8
// CHECK-64: [[RES:%[0-9]+]] = cmpxchg volatile ptr %incdec.ptr, i128 [[ORG]], i128 [[EXP]] seq_cst seq_cst, align 16
// CHECK-64: [[OLD:%[0-9]+]] = extractvalue { i128, i1 } [[RES]], 0
-// CHECK-64: store i128 [[OLD]], ptr %incdec.ptr2, align 16
+// CHECK-64: store i128 [[OLD]], ptr %incdec.ptr2, align 8
// CHECK-64: [[SUC1:%[0-9]+]] = extractvalue { i128, i1 } [[RES]], 1
// CHECK-64: [[SUC8:%[0-9]+]] = zext i1 [[SUC1]] to i8
// CHECK-64: ret i8 [[SUC8]]
diff --git a/clang/test/OpenMP/parallel_reduction_codegen.cpp b/clang/test/OpenMP/parallel_reduction_codegen.cpp
index 552d969d0b3e9..88a10a16615d0 100644
--- a/clang/test/OpenMP/parallel_reduction_codegen.cpp
+++ b/clang/test/OpenMP/parallel_reduction_codegen.cpp
@@ -1350,7 +1350,7 @@ int main() {
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.case2:
// CHECK1-NEXT: [[TMP21:%.*]] = load i32, ptr [[T_VAR2]], align 128
-// CHECK1-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP21]] monotonic, align 4
+// CHECK1-NEXT: [[TMP22:%.*]] = atomicrmw add ptr [[TMP1]], i32 [[TMP21]] monotonic, align 128
// CHECK1-NEXT: call void @__kmpc_critical(ptr @[[GLOB2]], i32 [[TMP12]], ptr @.gomp_critical_user_.atomic_reduction.var)
// CHECK1-NEXT: [[CALL10:%.*]] = call noundef nonnull align 4 dereferenceable(4) ptr @_ZN1SIiEanERKS0_(ptr noundef nonnull align 4 dereferenceable(4) [[TMP3]], ptr noundef nonnull align 4 dereferenceable(4) [[VAR3]])
// CHECK1-NEXT: call void @llvm.memcpy.p0.p0.i64(ptr align 128 [[TMP3]], ptr align 4 [[CALL10]], i64 4, i1 false)
@@ -1371,7 +1371,7 @@ int main() {
// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[REF_TMP11]]) #[[ATTR5]]
// CHECK1-NEXT: call void @__kmpc_end_critical(ptr @[[GLOB2]], i32 [[TMP12]], ptr @.gomp_critical_user_.atomic_reduction.var)
// CHECK1-NEXT: [[TMP24:%.*]] = load i32, ptr [[T_VAR15]], align 128
-// CHECK1-NEXT: [[TMP25:%.*]] = atomicrmw min ptr [[TMP5]], i32 [[TMP24]] monotonic, align 4
+// CHECK1-NEXT: [[TMP25:%.*]] = atomicrmw min ptr [[TMP5]], i32 [[TMP24]] monotonic, align 128
// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK1: .omp.reduction.default:
// CHECK1-NEXT: call void @_ZN1SIiED1Ev(ptr noundef nonnull align 4 dereferenceable(4) [[VAR14]]) #[[ATTR5]]
@@ -2368,7 +2368,7 @@ int main() {
// CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK4: .omp.reduction.case2:
// CHECK4-NEXT: [[TMP10:%.*]] = load i32, ptr [[G1]], align 128
-// CHECK4-NEXT: [[TMP11:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP10]] monotonic, align 4
+// CHECK4-NEXT: [[TMP11:%.*]] = atomicrmw add ptr [[TMP0]], i32 [[TMP10]] monotonic, align 128
// CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DEFAULT]]
// CHECK4: .omp.reduction.default:
// CHECK4-NEXT: ret void
More information about the cfe-commits
mailing list