r176420 - Improve C11 atomics support:

Sun Mar 3 11:19:37 PST 2013

On Sun, Mar 3, 2013 at 8:02 AM, David Chisnall <csdavec at swan.ac.uk> wrote:
> Author: theraven
> Date: Sun Mar  3 10:02:42 2013
> New Revision: 176420
>
> URL: http://llvm.org/viewvc/llvm-project?rev=176420&view=rev
> Log:
> Improve C11 atomics support:
>
> - Generate atomicrmw operations in most of the cases when it's sensible to do
>   so.

It would be nice if the LLVM optimizers could get this instead of
needing the frontend to do it. 'course, compile time might argue for
having the frontend do it _also_.

> - Don't crash in several common cases (and hopefully don't crash in more of
>   them).
> - Add some better tests.
>
> We now generate significantly better code for things like:
> _Atomic(int) x;
> ...
> x++;
>
> On MIPS, this now generates a 4-instruction ll/sc loop, where previously it
> generated about 30 instructions in two nested loops.  On x86-64, we generate a
> single lock incl, instead of a lock cmpxchgl loop (one instruction instead of
> ten).
>
>
> Added:
>     cfe/trunk/test/CodeGen/c11atomics.c
> Modified:
>     cfe/trunk/   (props changed)
>     cfe/trunk/lib/CodeGen/CGExprScalar.cpp
>     cfe/trunk/test/CodeGen/atomic_ops.c
>
> Propchange: cfe/trunk/
> ------------------------------------------------------------------------------
> --- svn:mergeinfo (original)
> +++ svn:mergeinfo Sun Mar  3 10:02:42 2013
> @@ -1,2 +1,3 @@
>  /cfe/branches/type-system-rewrite:134693-134817
> +/cfe/trunk/test:170344
>  /cfe/trunk/test/SemaTemplate:126920
>
> Modified: cfe/trunk/lib/CodeGen/CGExprScalar.cpp
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExprScalar.cpp?rev=176420&r1=176419&r2=176420&view=diff
> ==============================================================================
> --- cfe/trunk/lib/CodeGen/CGExprScalar.cpp (original)
> +++ cfe/trunk/lib/CodeGen/CGExprScalar.cpp Sun Mar  3 10:02:42 2013
> @@ -1444,21 +1444,60 @@ ScalarExprEmitter::EmitScalarPrePostIncD
>                                             bool isInc, bool isPre) {
>
>    QualType type = E->getSubExpr()->getType();
> -  llvm::Value *value = EmitLoadOfLValue(LV);
> -  llvm::Value *input = value;
>    llvm::PHINode *atomicPHI = 0;
> +  llvm::Value *value;
> +  llvm::Value *input;
>
>    int amount = (isInc ? 1 : -1);
>
>    if (const AtomicType *atomicTy = type->getAs<AtomicType>()) {
> +    type = atomicTy->getValueType();
> +    if (isInc && type->isBooleanType()) {
> +      llvm::Value *True = CGF.EmitToMemory(Builder.getTrue(), type);
> +      if (isPre) {
> +        Builder.Insert(new llvm::StoreInst(True,
> +              LV.getAddress(), LV.isVolatileQualified(),
> +              LV.getAlignment().getQuantity(),
> +              llvm::SequentiallyConsistent));
> +        return Builder.getTrue();
> +      }
> +      // For atomic bool increment, we just store true and return it for
> +      // preincrement, do an atomic swap with true for postincrement
> +        return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
> +            LV.getAddress(), True, llvm::SequentiallyConsistent);
> +    }
> +    // Special case for atomic increment / decrement on integers, emit
> +    // atomicrmw instructions.  We skip this if we want to be doing overflow
> +    // checking, and fall into the slow path with the atomic cmpxchg loop.
> +    if (!type->isBooleanType() && type->isIntegerType() &&
> +        !(type->isUnsignedIntegerType() &&
> +         CGF.SanOpts->UnsignedIntegerOverflow) &&
> +        CGF.getLangOpts().getSignedOverflowBehavior() !=
> +         LangOptions::SOB_Trapping) {

I'm not sure this is wrong, but note that "For signed integer types,
arithmetic is defined to use two’s complement representation with
silent wrap-around on overflow; there are no undefined results." in
C11 7.17.7.5. Likely you want to use SanOpts->UnsignedIntegerOverflow
for both signed and unsigned atomics.

I'm not terribly familiar with the code below here, but it looks like
the cmpxchg loop is wrong for this too.

> +      llvm::AtomicRMWInst::BinOp aop = isInc ? llvm::AtomicRMWInst::Add :
> +        llvm::AtomicRMWInst::Sub;
> +      llvm::Instruction::BinaryOps op = isInc ? llvm::Instruction::Add :
> +        llvm::Instruction::Sub;
> +      llvm::Value *amt = CGF.EmitToMemory(
> +          llvm::ConstantInt::get(ConvertType(type), 1, true), type);
> +      llvm::Value *old = Builder.CreateAtomicRMW(aop,
> +          LV.getAddress(), amt, llvm::SequentiallyConsistent);
> +      return isPre ? Builder.CreateBinOp(op, old, amt) : old;
> +    }
> +    value = EmitLoadOfLValue(LV);
> +    input = value;
> +    // For every other atomic operation, we need to emit a load-op-cmpxchg loop
>      llvm::BasicBlock *startBB = Builder.GetInsertBlock();
>      llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn);
> +    value = CGF.EmitToMemory(value, type);
>      Builder.CreateBr(opBB);
>      Builder.SetInsertPoint(opBB);
>      atomicPHI = Builder.CreatePHI(value->getType(), 2);
>      atomicPHI->addIncoming(value, startBB);
> -    type = atomicTy->getValueType();
>      value = atomicPHI;
> +  } else {
> +    value = EmitLoadOfLValue(LV);
> +    input = value;
>    }
>
>    // Special case of integer increment that we have to check first: bool++.
> @@ -1596,7 +1635,7 @@ ScalarExprEmitter::EmitScalarPrePostIncD
>      llvm::BasicBlock *opBB = Builder.GetInsertBlock();
>      llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
>      llvm::Value *old = Builder.CreateAtomicCmpXchg(LV.getAddress(), atomicPHI,
> -        value, llvm::SequentiallyConsistent);
> +        CGF.EmitToMemory(value, type), llvm::SequentiallyConsistent);
>      atomicPHI->addIncoming(old, opBB);
>      llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
>      Builder.CreateCondBr(success, contBB, opBB);
> @@ -1872,20 +1911,63 @@ LValue ScalarExprEmitter::EmitCompoundAs
>    OpInfo.E = E;
>    // Load/convert the LHS.
>    LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
> -  OpInfo.LHS = EmitLoadOfLValue(LHSLV);
>
>    llvm::PHINode *atomicPHI = 0;
> -  if (LHSTy->isAtomicType()) {
> +  if (const AtomicType *atomicTy = LHSTy->getAs<AtomicType>()) {
> +    QualType type = atomicTy->getValueType();
> +    if (!type->isBooleanType() && type->isIntegerType() &&
> +         !(type->isUnsignedIntegerType() &&
> +          CGF.SanOpts->UnsignedIntegerOverflow) &&
> +         CGF.getLangOpts().getSignedOverflowBehavior() !=
> +          LangOptions::SOB_Trapping) {
> +      llvm::AtomicRMWInst::BinOp aop = llvm::AtomicRMWInst::BAD_BINOP;
> +      switch (OpInfo.Opcode) {
> +        // We don't have atomicrmw operands for *, %, /, <<, >>
> +        case BO_MulAssign: case BO_DivAssign:
> +        case BO_RemAssign:
> +        case BO_ShlAssign:
> +        case BO_ShrAssign:
> +          break;
> +        case BO_AddAssign:
> +          aop = llvm::AtomicRMWInst::Add;
> +          break;
> +        case BO_SubAssign:
> +          aop = llvm::AtomicRMWInst::Sub;
> +          break;
> +        case BO_AndAssign:
> +          aop = llvm::AtomicRMWInst::And;
> +          break;
> +        case BO_XorAssign:
> +          aop = llvm::AtomicRMWInst::Xor;
> +          break;
> +        case BO_OrAssign:
> +          aop = llvm::AtomicRMWInst::Or;
> +          break;
> +        default:
> +          llvm_unreachable("Invalid compound assignment type");
> +      }
> +      if (aop != llvm::AtomicRMWInst::BAD_BINOP) {
> +        llvm::Value *amt = CGF.EmitToMemory(EmitScalarConversion(OpInfo.RHS,
> +              E->getRHS()->getType(), LHSTy), LHSTy);
> +        Builder.CreateAtomicRMW(aop, LHSLV.getAddress(), amt,
> +            llvm::SequentiallyConsistent);
> +        return LHSLV;
> +      }
> +    }
>      // FIXME: For floating point types, we should be saving and restoring the
>      // floating point environment in the loop.
>      llvm::BasicBlock *startBB = Builder.GetInsertBlock();
>      llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn);
> +    OpInfo.LHS = EmitLoadOfLValue(LHSLV);
> +    OpInfo.LHS = CGF.EmitToMemory(OpInfo.LHS, type);
>      Builder.CreateBr(opBB);
>      Builder.SetInsertPoint(opBB);
>      atomicPHI = Builder.CreatePHI(OpInfo.LHS->getType(), 2);
>      atomicPHI->addIncoming(OpInfo.LHS, startBB);
>      OpInfo.LHS = atomicPHI;
>    }
> +  else
> +    OpInfo.LHS = EmitLoadOfLValue(LHSLV);
>
>    OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy,
>                                      E->getComputationLHSType());
> @@ -1900,7 +1982,7 @@ LValue ScalarExprEmitter::EmitCompoundAs
>      llvm::BasicBlock *opBB = Builder.GetInsertBlock();
>      llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
>      llvm::Value *old = Builder.CreateAtomicCmpXchg(LHSLV.getAddress(), atomicPHI,
> -        Result, llvm::SequentiallyConsistent);
> +        CGF.EmitToMemory(Result, LHSTy), llvm::SequentiallyConsistent);
>      atomicPHI->addIncoming(old, opBB);
>      llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
>      Builder.CreateCondBr(success, contBB, opBB);
>
> Modified: cfe/trunk/test/CodeGen/atomic_ops.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/atomic_ops.c?rev=176420&r1=176419&r2=176420&view=diff
> ==============================================================================
> --- cfe/trunk/test/CodeGen/atomic_ops.c (original)
> +++ cfe/trunk/test/CodeGen/atomic_ops.c Sun Mar  3 10:02:42 2013
> @@ -15,9 +15,4 @@ void foo(int x)
>    // CHECK: sdiv i32
>    // CHECK: cmpxchg i16*
>
> -  // These should be emitting atomicrmw instructions, but they aren't yet
> -  i += 2; // CHECK: cmpxchg
> -  i -= 2; // CHECK: cmpxchg
> -  i++; // CHECK: cmpxchg
> -  i--; // CHECK: cmpxchg
>  }
>
> Added: cfe/trunk/test/CodeGen/c11atomics.c
> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/c11atomics.c?rev=176420&view=auto
> ==============================================================================
> --- cfe/trunk/test/CodeGen/c11atomics.c (added)
> +++ cfe/trunk/test/CodeGen/c11atomics.c Sun Mar  3 10:02:42 2013
> @@ -0,0 +1,139 @@
> +// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv7-unknown-freebsd -std=c11 | FileCheck %s
> +
> +// Test that we are generating atomicrmw instructions, rather than
> +// compare-exchange loops for common atomic ops.  This makes a big difference
> +// on RISC platforms, where the compare-exchange loop becomes a ll/sc pair for
> +// the load and then another ll/sc in the loop, expanding to about 30
> +// instructions when it should be only 4.  It has a smaller, but still
> +// noticeable, impact on platforms like x86 and RISC-V, where there are atomic
> +// RMW instructions.
> +//
> +// We currently emit cmpxchg loops for most operations on _Bools, because
> +// they're sufficiently rare that it's not worth making sure that the semantics
> +// are correct.
> +
> +typedef int __attribute__((vector_size(16))) vector;
> +
> +_Atomic(_Bool) b;
> +_Atomic(int) i;
> +_Atomic(long long) l;
> +_Atomic(short) s;
> +_Atomic(char*) p;
> +_Atomic(float) f;
> +_Atomic(vector) v;
> +
> +// CHECK-NOT: cmpxchg
> +
> +// CHECK: testinc
> +void testinc(void)
> +{
> +  // Special case for suffix bool++, sets to true and returns the old value.
> +  // CHECK: atomicrmw xchg i8* @b, i8 1 seq_cst
> +  b++;
> +  // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
> +  i++;
> +  // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
> +  l++;
> +  // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
> +  s++;
> +  // Prefix increment
> +  // Special case for bool: set to true and return true
> +  // CHECK: store atomic i8 1, i8* @b seq_cst, align 1
> +  ++b;
> +  // Currently, we have no variant of atomicrmw that returns the new value, so
> +  // we have to generate an atomic add, which returns the old value, and then a
> +  // non-atomic add.
> +  // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
> +  // CHECK: add i32
> +  ++i;
> +  // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
> +  // CHECK: add i64
> +  ++l;
> +  // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
> +  // CHECK: add i16
> +  ++s;
> +}
> +// CHECK: testdec
> +void testdec(void)
> +{
> +  // CHECK: cmpxchg i8* @b
> +  b--;
> +  // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
> +  i--;
> +  // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
> +  l--;
> +  // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
> +  s--;
> +  // CHECK: cmpxchg i8* @b
> +  --b;
> +  // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
> +  // CHECK: sub i32
> +  --i;
> +  // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
> +  // CHECK: sub i64
> +  --l;
> +  // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
> +  // CHECK: sub i16
> +  --s;
> +}
> +// CHECK: testaddeq
> +void testaddeq(void)
> +{
> +  // CHECK: cmpxchg i8* @b
> +  // CHECK: atomicrmw add i32* @i, i32 42 seq_cst
> +  // CHECK: atomicrmw add i64* @l, i64 42 seq_cst
> +  // CHECK: atomicrmw add i16* @s, i16 42 seq_cst
> +  b += 42;
> +  i += 42;
> +  l += 42;
> +  s += 42;
> +}
> +// CHECK: testsubeq
> +void testsubeq(void)
> +{
> +  // CHECK: cmpxchg i8* @b
> +  // CHECK: atomicrmw sub i32* @i, i32 42 seq_cst
> +  // CHECK: atomicrmw sub i64* @l, i64 42 seq_cst
> +  // CHECK: atomicrmw sub i16* @s, i16 42 seq_cst
> +  b -= 42;
> +  i -= 42;
> +  l -= 42;
> +  s -= 42;
> +}
> +// CHECK: testxoreq
> +void testxoreq(void)
> +{
> +  // CHECK: cmpxchg i8* @b
> +  // CHECK: atomicrmw xor i32* @i, i32 42 seq_cst
> +  // CHECK: atomicrmw xor i64* @l, i64 42 seq_cst
> +  // CHECK: atomicrmw xor i16* @s, i16 42 seq_cst
> +  b ^= 42;
> +  i ^= 42;
> +  l ^= 42;
> +  s ^= 42;
> +}
> +// CHECK: testoreq
> +void testoreq(void)
> +{
> +  // CHECK: cmpxchg i8* @b
> +  // CHECK: atomicrmw or i32* @i, i32 42 seq_cst
> +  // CHECK: atomicrmw or i64* @l, i64 42 seq_cst
> +  // CHECK: atomicrmw or i16* @s, i16 42 seq_cst
> +  b |= 42;
> +  i |= 42;
> +  l |= 42;
> +  s |= 42;
> +}
> +// CHECK: testandeq
> +void testandeq(void)
> +{
> +  // CHECK: cmpxchg i8* @b
> +  // CHECK: atomicrmw and i32* @i, i32 42 seq_cst
> +  // CHECK: atomicrmw and i64* @l, i64 42 seq_cst
> +  // CHECK: atomicrmw and i16* @s, i16 42 seq_cst
> +  b &= 42;
> +  i &= 42;
> +  l &= 42;
> +  s &= 42;
> +}
> +
>
>
> _______________________________________________
> cfe-commits mailing list
> cfe-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits