r176420 - Improve C11 atomics support:
Jeffrey Yasskin
jyasskin at googlers.com
Sun Mar 3 11:19:37 PST 2013
On Sun, Mar 3, 2013 at 8:02 AM, David Chisnall <csdavec at swan.ac.uk> wrote:
> Author: theraven
> Date: Sun Mar 3 10:02:42 2013
> New Revision: 176420
>
> URL: http://llvm.org/viewvc/llvm-project?rev=176420&view=rev
> Log:
> Improve C11 atomics support:
>
> - Generate atomicrmw operations in most of the cases when it's sensible to do
> so.
It would be nice if the LLVM optimizers could get this instead of
needing the frontend to do it. 'course, compile time might argue for
having the frontend do it _also_.
> - Don't crash in several common cases (and hopefully don't crash in more of
> them).
> - Add some better tests.
>
> We now generate significantly better code for things like:
> _Atomic(int) x;
> ...
> x++;
>
> On MIPS, this now generates a 4-instruction ll/sc loop, where previously it
> generated about 30 instructions in two nested loops. On x86-64, we generate a
> single lock incl, instead of a lock cmpxchgl loop (one instruction instead of
> ten).
>
>
> Added:
> cfe/trunk/test/CodeGen/c11atomics.c
> Modified:
> cfe/trunk/ (props changed)
> cfe/trunk/lib/CodeGen/CGExprScalar.cpp
> cfe/trunk/test/CodeGen/atomic_ops.c
>
> Propchange: cfe/trunk/
> ------------------------------------------------------------------------------
> --- svn:mergeinfo (original)
> +++ svn:mergeinfo Sun Mar 3 10:02:42 2013
> @@ -1,2 +1,3 @@
> /cfe/branches/type-system-rewrite:134693-134817
> +/cfe/trunk/test:170344
> /cfe/trunk/test/SemaTemplate:126920
>
> Modified: cfe/trunk/lib/CodeGen/CGExprScalar.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExprScalar.cpp?rev=176420&r1=176419&r2=176420&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGExprScalar.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGExprScalar.cpp Sun Mar 3 10:02:42 2013
> @@ -1444,21 +1444,60 @@ ScalarExprEmitter::EmitScalarPrePostIncD
> bool isInc, bool isPre) {
>
> QualType type = E->getSubExpr()->getType();
> - llvm::Value *value = EmitLoadOfLValue(LV);
> - llvm::Value *input = value;
> llvm::PHINode *atomicPHI = 0;
> + llvm::Value *value;
> + llvm::Value *input;
>
> int amount = (isInc ? 1 : -1);
>
> if (const AtomicType *atomicTy = type->getAs<AtomicType>()) {
> + type = atomicTy->getValueType();
> + if (isInc && type->isBooleanType()) {
> + llvm::Value *True = CGF.EmitToMemory(Builder.getTrue(), type);
> + if (isPre) {
> + Builder.Insert(new llvm::StoreInst(True,
> + LV.getAddress(), LV.isVolatileQualified(),
> + LV.getAlignment().getQuantity(),
> + llvm::SequentiallyConsistent));
> + return Builder.getTrue();
> + }
> + // For atomic bool increment, we just store true and return it for
> + // preincrement, do an atomic swap with true for postincrement
> + return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
> + LV.getAddress(), True, llvm::SequentiallyConsistent);
> + }
> + // Special case for atomic increment / decrement on integers, emit
> + // atomicrmw instructions. We skip this if we want to be doing overflow
> + // checking, and fall into the slow path with the atomic cmpxchg loop.
> + if (!type->isBooleanType() && type->isIntegerType() &&
> + !(type->isUnsignedIntegerType() &&
> + CGF.SanOpts->UnsignedIntegerOverflow) &&
> + CGF.getLangOpts().getSignedOverflowBehavior() !=
> + LangOptions::SOB_Trapping) {
I'm not sure this is wrong, but note that "For signed integer types,
arithmetic is defined to use two’s complement representation with
silent wrap-around on overflow; there are no undefined results." in
C11 7.17.7.5. Likely you want to use SanOpts->UnsignedIntegerOverflow
for both signed and unsigned atomics.
I'm not terribly familiar with the code below here, but it looks like
the cmpxchg loop is wrong for this too.
> + llvm::AtomicRMWInst::BinOp aop = isInc ? llvm::AtomicRMWInst::Add :
> + llvm::AtomicRMWInst::Sub;
> + llvm::Instruction::BinaryOps op = isInc ? llvm::Instruction::Add :
> + llvm::Instruction::Sub;
> + llvm::Value *amt = CGF.EmitToMemory(
> + llvm::ConstantInt::get(ConvertType(type), 1, true), type);
> + llvm::Value *old = Builder.CreateAtomicRMW(aop,
> + LV.getAddress(), amt, llvm::SequentiallyConsistent);
> + return isPre ? Builder.CreateBinOp(op, old, amt) : old;
> + }
> + value = EmitLoadOfLValue(LV);
> + input = value;
> + // For every other atomic operation, we need to emit a load-op-cmpxchg loop
> llvm::BasicBlock *startBB = Builder.GetInsertBlock();
> llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn);
> + value = CGF.EmitToMemory(value, type);
> Builder.CreateBr(opBB);
> Builder.SetInsertPoint(opBB);
> atomicPHI = Builder.CreatePHI(value->getType(), 2);
> atomicPHI->addIncoming(value, startBB);
> - type = atomicTy->getValueType();
> value = atomicPHI;
> + } else {
> + value = EmitLoadOfLValue(LV);
> + input = value;
> }
>
> // Special case of integer increment that we have to check first: bool++.
> @@ -1596,7 +1635,7 @@ ScalarExprEmitter::EmitScalarPrePostIncD
> llvm::BasicBlock *opBB = Builder.GetInsertBlock();
> llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
> llvm::Value *old = Builder.CreateAtomicCmpXchg(LV.getAddress(), atomicPHI,
> - value, llvm::SequentiallyConsistent);
> + CGF.EmitToMemory(value, type), llvm::SequentiallyConsistent);
> atomicPHI->addIncoming(old, opBB);
> llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
> Builder.CreateCondBr(success, contBB, opBB);
> @@ -1872,20 +1911,63 @@ LValue ScalarExprEmitter::EmitCompoundAs
> OpInfo.E = E;
> // Load/convert the LHS.
> LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
> - OpInfo.LHS = EmitLoadOfLValue(LHSLV);
>
> llvm::PHINode *atomicPHI = 0;
> - if (LHSTy->isAtomicType()) {
> + if (const AtomicType *atomicTy = LHSTy->getAs<AtomicType>()) {
> + QualType type = atomicTy->getValueType();
> + if (!type->isBooleanType() && type->isIntegerType() &&
> + !(type->isUnsignedIntegerType() &&
> + CGF.SanOpts->UnsignedIntegerOverflow) &&
> + CGF.getLangOpts().getSignedOverflowBehavior() !=
> + LangOptions::SOB_Trapping) {
> + llvm::AtomicRMWInst::BinOp aop = llvm::AtomicRMWInst::BAD_BINOP;
> + switch (OpInfo.Opcode) {
> + // We don't have atomicrmw operands for *, %, /, <<, >>
> + case BO_MulAssign: case BO_DivAssign:
> + case BO_RemAssign:
> + case BO_ShlAssign:
> + case BO_ShrAssign:
> + break;
> + case BO_AddAssign:
> + aop = llvm::AtomicRMWInst::Add;
> + break;
> + case BO_SubAssign:
> + aop = llvm::AtomicRMWInst::Sub;
> + break;
> + case BO_AndAssign:
> + aop = llvm::AtomicRMWInst::And;
> + break;
> + case BO_XorAssign:
> + aop = llvm::AtomicRMWInst::Xor;
> + break;
> + case BO_OrAssign:
> + aop = llvm::AtomicRMWInst::Or;
> + break;
> + default:
> + llvm_unreachable("Invalid compound assignment type");
> + }
> + if (aop != llvm::AtomicRMWInst::BAD_BINOP) {
> + llvm::Value *amt = CGF.EmitToMemory(EmitScalarConversion(OpInfo.RHS,
> + E->getRHS()->getType(), LHSTy), LHSTy);
> + Builder.CreateAtomicRMW(aop, LHSLV.getAddress(), amt,
> + llvm::SequentiallyConsistent);
> + return LHSLV;
> + }
> + }
> // FIXME: For floating point types, we should be saving and restoring the
> // floating point environment in the loop.
> llvm::BasicBlock *startBB = Builder.GetInsertBlock();
> llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn);
> + OpInfo.LHS = EmitLoadOfLValue(LHSLV);
> + OpInfo.LHS = CGF.EmitToMemory(OpInfo.LHS, type);
> Builder.CreateBr(opBB);
> Builder.SetInsertPoint(opBB);
> atomicPHI = Builder.CreatePHI(OpInfo.LHS->getType(), 2);
> atomicPHI->addIncoming(OpInfo.LHS, startBB);
> OpInfo.LHS = atomicPHI;
> }
> + else
> + OpInfo.LHS = EmitLoadOfLValue(LHSLV);
>
> OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy,
> E->getComputationLHSType());
> @@ -1900,7 +1982,7 @@ LValue ScalarExprEmitter::EmitCompoundAs
> llvm::BasicBlock *opBB = Builder.GetInsertBlock();
> llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
> llvm::Value *old = Builder.CreateAtomicCmpXchg(LHSLV.getAddress(), atomicPHI,
> - Result, llvm::SequentiallyConsistent);
> + CGF.EmitToMemory(Result, LHSTy), llvm::SequentiallyConsistent);
> atomicPHI->addIncoming(old, opBB);
> llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
> Builder.CreateCondBr(success, contBB, opBB);
>
> Modified: cfe/trunk/test/CodeGen/atomic_ops.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/atomic_ops.c?rev=176420&r1=176419&r2=176420&view=diff
> ==============================================================================
> --- cfe/trunk/test/CodeGen/atomic_ops.c (original)
> +++ cfe/trunk/test/CodeGen/atomic_ops.c Sun Mar 3 10:02:42 2013
> @@ -15,9 +15,4 @@ void foo(int x)
> // CHECK: sdiv i32
> // CHECK: cmpxchg i16*
>
> - // These should be emitting atomicrmw instructions, but they aren't yet
> - i += 2; // CHECK: cmpxchg
> - i -= 2; // CHECK: cmpxchg
> - i++; // CHECK: cmpxchg
> - i--; // CHECK: cmpxchg
> }
>
> Added: cfe/trunk/test/CodeGen/c11atomics.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/c11atomics.c?rev=176420&view=auto
> ==============================================================================
> --- cfe/trunk/test/CodeGen/c11atomics.c (added)
> +++ cfe/trunk/test/CodeGen/c11atomics.c Sun Mar 3 10:02:42 2013
> @@ -0,0 +1,139 @@
> +// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv7-unknown-freebsd -std=c11 | FileCheck %s
> +
> +// Test that we are generating atomicrmw instructions, rather than
> +// compare-exchange loops for common atomic ops. This makes a big difference
> +// on RISC platforms, where the compare-exchange loop becomes a ll/sc pair for
> +// the load and then another ll/sc in the loop, expanding to about 30
> +// instructions when it should be only 4. It has a smaller, but still
> +// noticeable, impact on platforms like x86 and RISC-V, where there are atomic
> +// RMW instructions.
> +//
> +// We currently emit cmpxchg loops for most operations on _Bools, because
> +// they're sufficiently rare that it's not worth making sure that the semantics
> +// are correct.
> +
> +typedef int __attribute__((vector_size(16))) vector;
> +
> +_Atomic(_Bool) b;
> +_Atomic(int) i;
> +_Atomic(long long) l;
> +_Atomic(short) s;
> +_Atomic(char*) p;
> +_Atomic(float) f;
> +_Atomic(vector) v;
> +
> +// CHECK-NOT: cmpxchg
> +
> +// CHECK: testinc
> +void testinc(void)
> +{
> + // Special case for suffix bool++, sets to true and returns the old value.
> + // CHECK: atomicrmw xchg i8* @b, i8 1 seq_cst
> + b++;
> + // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
> + i++;
> + // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
> + l++;
> + // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
> + s++;
> + // Prefix increment
> + // Special case for bool: set to true and return true
> + // CHECK: store atomic i8 1, i8* @b seq_cst, align 1
> + ++b;
> + // Currently, we have no variant of atomicrmw that returns the new value, so
> + // we have to generate an atomic add, which returns the old value, and then a
> + // non-atomic add.
> + // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
> + // CHECK: add i32
> + ++i;
> + // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
> + // CHECK: add i64
> + ++l;
> + // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
> + // CHECK: add i16
> + ++s;
> +}
> +// CHECK: testdec
> +void testdec(void)
> +{
> + // CHECK: cmpxchg i8* @b
> + b--;
> + // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
> + i--;
> + // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
> + l--;
> + // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
> + s--;
> + // CHECK: cmpxchg i8* @b
> + --b;
> + // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
> + // CHECK: sub i32
> + --i;
> + // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
> + // CHECK: sub i64
> + --l;
> + // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
> + // CHECK: sub i16
> + --s;
> +}
> +// CHECK: testaddeq
> +void testaddeq(void)
> +{
> + // CHECK: cmpxchg i8* @b
> + // CHECK: atomicrmw add i32* @i, i32 42 seq_cst
> + // CHECK: atomicrmw add i64* @l, i64 42 seq_cst
> + // CHECK: atomicrmw add i16* @s, i16 42 seq_cst
> + b += 42;
> + i += 42;
> + l += 42;
> + s += 42;
> +}
> +// CHECK: testsubeq
> +void testsubeq(void)
> +{
> + // CHECK: cmpxchg i8* @b
> + // CHECK: atomicrmw sub i32* @i, i32 42 seq_cst
> + // CHECK: atomicrmw sub i64* @l, i64 42 seq_cst
> + // CHECK: atomicrmw sub i16* @s, i16 42 seq_cst
> + b -= 42;
> + i -= 42;
> + l -= 42;
> + s -= 42;
> +}
> +// CHECK: testxoreq
> +void testxoreq(void)
> +{
> + // CHECK: cmpxchg i8* @b
> + // CHECK: atomicrmw xor i32* @i, i32 42 seq_cst
> + // CHECK: atomicrmw xor i64* @l, i64 42 seq_cst
> + // CHECK: atomicrmw xor i16* @s, i16 42 seq_cst
> + b ^= 42;
> + i ^= 42;
> + l ^= 42;
> + s ^= 42;
> +}
> +// CHECK: testoreq
> +void testoreq(void)
> +{
> + // CHECK: cmpxchg i8* @b
> + // CHECK: atomicrmw or i32* @i, i32 42 seq_cst
> + // CHECK: atomicrmw or i64* @l, i64 42 seq_cst
> + // CHECK: atomicrmw or i16* @s, i16 42 seq_cst
> + b |= 42;
> + i |= 42;
> + l |= 42;
> + s |= 42;
> +}
> +// CHECK: testandeq
> +void testandeq(void)
> +{
> + // CHECK: cmpxchg i8* @b
> + // CHECK: atomicrmw and i32* @i, i32 42 seq_cst
> + // CHECK: atomicrmw and i64* @l, i64 42 seq_cst
> + // CHECK: atomicrmw and i16* @s, i16 42 seq_cst
> + b &= 42;
> + i &= 42;
> + l &= 42;
> + s &= 42;
> +}
> +
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
More information about the cfe-commits
mailing list