r176420 - Improve C11 atomics support:
Jeffrey Yasskin
jyasskin at googlers.com
Sun Mar 3 11:30:17 PST 2013
On Sun, Mar 3, 2013 at 11:19 AM, Jeffrey Yasskin <jyasskin at googlers.com> wrote:
> On Sun, Mar 3, 2013 at 8:02 AM, David Chisnall <csdavec at swan.ac.uk> wrote:
>> Author: theraven
>> Date: Sun Mar 3 10:02:42 2013
>> New Revision: 176420
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=176420&view=rev
>> Log:
>> Improve C11 atomics support:
>>
>> - Generate atomicrmw operations in most of the cases when it's sensible to do
>> so.
>
> It would be nice if the LLVM optimizers could get this instead of
> needing the frontend to do it. 'course, compile time might argue for
> having the frontend do it _also_.
>
>> - Don't crash in several common cases (and hopefully don't crash in more of
>> them).
>> - Add some better tests.
>>
>> We now generate significantly better code for things like:
>> _Atomic(int) x;
>> ...
>> x++;
>>
>> On MIPS, this now generates a 4-instruction ll/sc loop, where previously it
>> generated about 30 instructions in two nested loops. On x86-64, we generate a
>> single lock incl, instead of a lock cmpxchgl loop (one instruction instead of
>> ten).
>>
>>
>> Added:
>> cfe/trunk/test/CodeGen/c11atomics.c
>> Modified:
>> cfe/trunk/ (props changed)
>> cfe/trunk/lib/CodeGen/CGExprScalar.cpp
>> cfe/trunk/test/CodeGen/atomic_ops.c
>>
>> Propchange: cfe/trunk/
>> ------------------------------------------------------------------------------
>> --- svn:mergeinfo (original)
>> +++ svn:mergeinfo Sun Mar 3 10:02:42 2013
>> @@ -1,2 +1,3 @@
>> /cfe/branches/type-system-rewrite:134693-134817
>> +/cfe/trunk/test:170344
>> /cfe/trunk/test/SemaTemplate:126920
>>
>> Modified: cfe/trunk/lib/CodeGen/CGExprScalar.cpp
>> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExprScalar.cpp?rev=176420&r1=176419&r2=176420&view=diff
>> ==============================================================================
>> --- cfe/trunk/lib/CodeGen/CGExprScalar.cpp (original)
>> +++ cfe/trunk/lib/CodeGen/CGExprScalar.cpp Sun Mar 3 10:02:42 2013
>> @@ -1444,21 +1444,60 @@ ScalarExprEmitter::EmitScalarPrePostIncD
>> bool isInc, bool isPre) {
>>
>> QualType type = E->getSubExpr()->getType();
>> - llvm::Value *value = EmitLoadOfLValue(LV);
>> - llvm::Value *input = value;
>> llvm::PHINode *atomicPHI = 0;
>> + llvm::Value *value;
>> + llvm::Value *input;
>>
>> int amount = (isInc ? 1 : -1);
>>
>> if (const AtomicType *atomicTy = type->getAs<AtomicType>()) {
>> + type = atomicTy->getValueType();
>> + if (isInc && type->isBooleanType()) {
>> + llvm::Value *True = CGF.EmitToMemory(Builder.getTrue(), type);
>> + if (isPre) {
>> + Builder.Insert(new llvm::StoreInst(True,
>> + LV.getAddress(), LV.isVolatileQualified(),
>> + LV.getAlignment().getQuantity(),
>> + llvm::SequentiallyConsistent));
>> + return Builder.getTrue();
>> + }
>> + // For atomic bool increment, we just store true and return it for
>> + // preincrement, do an atomic swap with true for postincrement
>> + return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
>> + LV.getAddress(), True, llvm::SequentiallyConsistent);
>> + }
>> + // Special case for atomic increment / decrement on integers, emit
>> + // atomicrmw instructions. We skip this if we want to be doing overflow
>> + // checking, and fall into the slow path with the atomic cmpxchg loop.
>> + if (!type->isBooleanType() && type->isIntegerType() &&
>> + !(type->isUnsignedIntegerType() &&
>> + CGF.SanOpts->UnsignedIntegerOverflow) &&
>> + CGF.getLangOpts().getSignedOverflowBehavior() !=
>> + LangOptions::SOB_Trapping) {
>
> I'm not sure this is wrong, but note that "For signed integer types,
> arithmetic is defined to use two’s complement representation with
> silent wrap-around on overflow; there are no undefined results." in
> C11 7.17.7.5. Likely you want to use SanOpts->UnsignedIntegerOverflow
> for both signed and unsigned atomics.
>
> I'm not terribly familiar with the code below here, but it looks like
> the cmpxchg loop is wrong for this too.
Never mind, I think. You're talking about built-in operators, while I
quoted the explicit functions.
>> + llvm::AtomicRMWInst::BinOp aop = isInc ? llvm::AtomicRMWInst::Add :
>> + llvm::AtomicRMWInst::Sub;
>> + llvm::Instruction::BinaryOps op = isInc ? llvm::Instruction::Add :
>> + llvm::Instruction::Sub;
>> + llvm::Value *amt = CGF.EmitToMemory(
>> + llvm::ConstantInt::get(ConvertType(type), 1, true), type);
>> + llvm::Value *old = Builder.CreateAtomicRMW(aop,
>> + LV.getAddress(), amt, llvm::SequentiallyConsistent);
>> + return isPre ? Builder.CreateBinOp(op, old, amt) : old;
>> + }
>> + value = EmitLoadOfLValue(LV);
>> + input = value;
>> + // For every other atomic operation, we need to emit a load-op-cmpxchg loop
>> llvm::BasicBlock *startBB = Builder.GetInsertBlock();
>> llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn);
>> + value = CGF.EmitToMemory(value, type);
>> Builder.CreateBr(opBB);
>> Builder.SetInsertPoint(opBB);
>> atomicPHI = Builder.CreatePHI(value->getType(), 2);
>> atomicPHI->addIncoming(value, startBB);
>> - type = atomicTy->getValueType();
>> value = atomicPHI;
>> + } else {
>> + value = EmitLoadOfLValue(LV);
>> + input = value;
>> }
>>
>> // Special case of integer increment that we have to check first: bool++.
>> @@ -1596,7 +1635,7 @@ ScalarExprEmitter::EmitScalarPrePostIncD
>> llvm::BasicBlock *opBB = Builder.GetInsertBlock();
>> llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
>> llvm::Value *old = Builder.CreateAtomicCmpXchg(LV.getAddress(), atomicPHI,
>> - value, llvm::SequentiallyConsistent);
>> + CGF.EmitToMemory(value, type), llvm::SequentiallyConsistent);
>> atomicPHI->addIncoming(old, opBB);
>> llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
>> Builder.CreateCondBr(success, contBB, opBB);
>> @@ -1872,20 +1911,63 @@ LValue ScalarExprEmitter::EmitCompoundAs
>> OpInfo.E = E;
>> // Load/convert the LHS.
>> LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
>> - OpInfo.LHS = EmitLoadOfLValue(LHSLV);
>>
>> llvm::PHINode *atomicPHI = 0;
>> - if (LHSTy->isAtomicType()) {
>> + if (const AtomicType *atomicTy = LHSTy->getAs<AtomicType>()) {
>> + QualType type = atomicTy->getValueType();
>> + if (!type->isBooleanType() && type->isIntegerType() &&
>> + !(type->isUnsignedIntegerType() &&
>> + CGF.SanOpts->UnsignedIntegerOverflow) &&
>> + CGF.getLangOpts().getSignedOverflowBehavior() !=
>> + LangOptions::SOB_Trapping) {
>> + llvm::AtomicRMWInst::BinOp aop = llvm::AtomicRMWInst::BAD_BINOP;
>> + switch (OpInfo.Opcode) {
>> + // We don't have atomicrmw operands for *, %, /, <<, >>
>> + case BO_MulAssign: case BO_DivAssign:
>> + case BO_RemAssign:
>> + case BO_ShlAssign:
>> + case BO_ShrAssign:
>> + break;
>> + case BO_AddAssign:
>> + aop = llvm::AtomicRMWInst::Add;
>> + break;
>> + case BO_SubAssign:
>> + aop = llvm::AtomicRMWInst::Sub;
>> + break;
>> + case BO_AndAssign:
>> + aop = llvm::AtomicRMWInst::And;
>> + break;
>> + case BO_XorAssign:
>> + aop = llvm::AtomicRMWInst::Xor;
>> + break;
>> + case BO_OrAssign:
>> + aop = llvm::AtomicRMWInst::Or;
>> + break;
>> + default:
>> + llvm_unreachable("Invalid compound assignment type");
>> + }
>> + if (aop != llvm::AtomicRMWInst::BAD_BINOP) {
>> + llvm::Value *amt = CGF.EmitToMemory(EmitScalarConversion(OpInfo.RHS,
>> + E->getRHS()->getType(), LHSTy), LHSTy);
>> + Builder.CreateAtomicRMW(aop, LHSLV.getAddress(), amt,
>> + llvm::SequentiallyConsistent);
>> + return LHSLV;
>> + }
>> + }
>> // FIXME: For floating point types, we should be saving and restoring the
>> // floating point environment in the loop.
>> llvm::BasicBlock *startBB = Builder.GetInsertBlock();
>> llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn);
>> + OpInfo.LHS = EmitLoadOfLValue(LHSLV);
>> + OpInfo.LHS = CGF.EmitToMemory(OpInfo.LHS, type);
>> Builder.CreateBr(opBB);
>> Builder.SetInsertPoint(opBB);
>> atomicPHI = Builder.CreatePHI(OpInfo.LHS->getType(), 2);
>> atomicPHI->addIncoming(OpInfo.LHS, startBB);
>> OpInfo.LHS = atomicPHI;
>> }
>> + else
>> + OpInfo.LHS = EmitLoadOfLValue(LHSLV);
>>
>> OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy,
>> E->getComputationLHSType());
>> @@ -1900,7 +1982,7 @@ LValue ScalarExprEmitter::EmitCompoundAs
>> llvm::BasicBlock *opBB = Builder.GetInsertBlock();
>> llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
>> llvm::Value *old = Builder.CreateAtomicCmpXchg(LHSLV.getAddress(), atomicPHI,
>> - Result, llvm::SequentiallyConsistent);
>> + CGF.EmitToMemory(Result, LHSTy), llvm::SequentiallyConsistent);
>> atomicPHI->addIncoming(old, opBB);
>> llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
>> Builder.CreateCondBr(success, contBB, opBB);
>>
>> Modified: cfe/trunk/test/CodeGen/atomic_ops.c
>> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/atomic_ops.c?rev=176420&r1=176419&r2=176420&view=diff
>> ==============================================================================
>> --- cfe/trunk/test/CodeGen/atomic_ops.c (original)
>> +++ cfe/trunk/test/CodeGen/atomic_ops.c Sun Mar 3 10:02:42 2013
>> @@ -15,9 +15,4 @@ void foo(int x)
>> // CHECK: sdiv i32
>> // CHECK: cmpxchg i16*
>>
>> - // These should be emitting atomicrmw instructions, but they aren't yet
>> - i += 2; // CHECK: cmpxchg
>> - i -= 2; // CHECK: cmpxchg
>> - i++; // CHECK: cmpxchg
>> - i--; // CHECK: cmpxchg
>> }
>>
>> Added: cfe/trunk/test/CodeGen/c11atomics.c
>> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/c11atomics.c?rev=176420&view=auto
>> ==============================================================================
>> --- cfe/trunk/test/CodeGen/c11atomics.c (added)
>> +++ cfe/trunk/test/CodeGen/c11atomics.c Sun Mar 3 10:02:42 2013
>> @@ -0,0 +1,139 @@
>> +// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv7-unknown-freebsd -std=c11 | FileCheck %s
>> +
>> +// Test that we are generating atomicrmw instructions, rather than
>> +// compare-exchange loops for common atomic ops. This makes a big difference
>> +// on RISC platforms, where the compare-exchange loop becomes a ll/sc pair for
>> +// the load and then another ll/sc in the loop, expanding to about 30
>> +// instructions when it should be only 4. It has a smaller, but still
>> +// noticeable, impact on platforms like x86 and RISC-V, where there are atomic
>> +// RMW instructions.
>> +//
>> +// We currently emit cmpxchg loops for most operations on _Bools, because
>> +// they're sufficiently rare that it's not worth making sure that the semantics
>> +// are correct.
>> +
>> +typedef int __attribute__((vector_size(16))) vector;
>> +
>> +_Atomic(_Bool) b;
>> +_Atomic(int) i;
>> +_Atomic(long long) l;
>> +_Atomic(short) s;
>> +_Atomic(char*) p;
>> +_Atomic(float) f;
>> +_Atomic(vector) v;
>> +
>> +// CHECK-NOT: cmpxchg
>> +
>> +// CHECK: testinc
>> +void testinc(void)
>> +{
>> + // Special case for suffix bool++, sets to true and returns the old value.
>> + // CHECK: atomicrmw xchg i8* @b, i8 1 seq_cst
>> + b++;
>> + // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
>> + i++;
>> + // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
>> + l++;
>> + // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
>> + s++;
>> + // Prefix increment
>> + // Special case for bool: set to true and return true
>> + // CHECK: store atomic i8 1, i8* @b seq_cst, align 1
>> + ++b;
>> + // Currently, we have no variant of atomicrmw that returns the new value, so
>> + // we have to generate an atomic add, which returns the old value, and then a
>> + // non-atomic add.
>> + // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
>> + // CHECK: add i32
>> + ++i;
>> + // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
>> + // CHECK: add i64
>> + ++l;
>> + // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
>> + // CHECK: add i16
>> + ++s;
>> +}
>> +// CHECK: testdec
>> +void testdec(void)
>> +{
>> + // CHECK: cmpxchg i8* @b
>> + b--;
>> + // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
>> + i--;
>> + // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
>> + l--;
>> + // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
>> + s--;
>> + // CHECK: cmpxchg i8* @b
>> + --b;
>> + // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
>> + // CHECK: sub i32
>> + --i;
>> + // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
>> + // CHECK: sub i64
>> + --l;
>> + // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
>> + // CHECK: sub i16
>> + --s;
>> +}
>> +// CHECK: testaddeq
>> +void testaddeq(void)
>> +{
>> + // CHECK: cmpxchg i8* @b
>> + // CHECK: atomicrmw add i32* @i, i32 42 seq_cst
>> + // CHECK: atomicrmw add i64* @l, i64 42 seq_cst
>> + // CHECK: atomicrmw add i16* @s, i16 42 seq_cst
>> + b += 42;
>> + i += 42;
>> + l += 42;
>> + s += 42;
>> +}
>> +// CHECK: testsubeq
>> +void testsubeq(void)
>> +{
>> + // CHECK: cmpxchg i8* @b
>> + // CHECK: atomicrmw sub i32* @i, i32 42 seq_cst
>> + // CHECK: atomicrmw sub i64* @l, i64 42 seq_cst
>> + // CHECK: atomicrmw sub i16* @s, i16 42 seq_cst
>> + b -= 42;
>> + i -= 42;
>> + l -= 42;
>> + s -= 42;
>> +}
>> +// CHECK: testxoreq
>> +void testxoreq(void)
>> +{
>> + // CHECK: cmpxchg i8* @b
>> + // CHECK: atomicrmw xor i32* @i, i32 42 seq_cst
>> + // CHECK: atomicrmw xor i64* @l, i64 42 seq_cst
>> + // CHECK: atomicrmw xor i16* @s, i16 42 seq_cst
>> + b ^= 42;
>> + i ^= 42;
>> + l ^= 42;
>> + s ^= 42;
>> +}
>> +// CHECK: testoreq
>> +void testoreq(void)
>> +{
>> + // CHECK: cmpxchg i8* @b
>> + // CHECK: atomicrmw or i32* @i, i32 42 seq_cst
>> + // CHECK: atomicrmw or i64* @l, i64 42 seq_cst
>> + // CHECK: atomicrmw or i16* @s, i16 42 seq_cst
>> + b |= 42;
>> + i |= 42;
>> + l |= 42;
>> + s |= 42;
>> +}
>> +// CHECK: testandeq
>> +void testandeq(void)
>> +{
>> + // CHECK: cmpxchg i8* @b
>> + // CHECK: atomicrmw and i32* @i, i32 42 seq_cst
>> + // CHECK: atomicrmw and i64* @l, i64 42 seq_cst
>> + // CHECK: atomicrmw and i16* @s, i16 42 seq_cst
>> + b &= 42;
>> + i &= 42;
>> + l &= 42;
>> + s &= 42;
>> +}
>> +
>>
>>
>> _______________________________________________
>> cfe-commits mailing list
>> cfe-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
More information about the cfe-commits
mailing list