r176420 - Improve C11 atomics support:

Sun Mar 3 11:30:17 PST 2013

On Sun, Mar 3, 2013 at 11:19 AM, Jeffrey Yasskin <jyasskin at googlers.com> wrote:
> On Sun, Mar 3, 2013 at 8:02 AM, David Chisnall <csdavec at swan.ac.uk> wrote:
>> Author: theraven
>> Date: Sun Mar  3 10:02:42 2013
>> New Revision: 176420
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=176420&view=rev
>> Log:
>> Improve C11 atomics support:
>>
>> - Generate atomicrmw operations in most of the cases when it's sensible to do
>>   so.
>
> It would be nice if the LLVM optimizers could get this instead of
> needing the frontend to do it. 'course, compile time might argue for
> having the frontend do it _also_.
>
>> - Don't crash in several common cases (and hopefully don't crash in more of
>>   them).
>> - Add some better tests.
>>
>> We now generate significantly better code for things like:
>> _Atomic(int) x;
>> ...
>> x++;
>>
>> On MIPS, this now generates a 4-instruction ll/sc loop, where previously it
>> generated about 30 instructions in two nested loops.  On x86-64, we generate a
>> single lock incl, instead of a lock cmpxchgl loop (one instruction instead of
>> ten).
>>
>>
>> Added:
>>     cfe/trunk/test/CodeGen/c11atomics.c
>> Modified:
>>     cfe/trunk/   (props changed)
>>     cfe/trunk/lib/CodeGen/CGExprScalar.cpp
>>     cfe/trunk/test/CodeGen/atomic_ops.c
>>
>> Propchange: cfe/trunk/
>> ------------------------------------------------------------------------------
>> --- svn:mergeinfo (original)
>> +++ svn:mergeinfo Sun Mar  3 10:02:42 2013
>> @@ -1,2 +1,3 @@
>>  /cfe/branches/type-system-rewrite:134693-134817
>> +/cfe/trunk/test:170344
>>  /cfe/trunk/test/SemaTemplate:126920
>>
>> Modified: cfe/trunk/lib/CodeGen/CGExprScalar.cpp
>> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGExprScalar.cpp?rev=176420&r1=176419&r2=176420&view=diff
>> ==============================================================================
>> --- cfe/trunk/lib/CodeGen/CGExprScalar.cpp (original)
>> +++ cfe/trunk/lib/CodeGen/CGExprScalar.cpp Sun Mar  3 10:02:42 2013
>> @@ -1444,21 +1444,60 @@ ScalarExprEmitter::EmitScalarPrePostIncD
>>                                             bool isInc, bool isPre) {
>>
>>    QualType type = E->getSubExpr()->getType();
>> -  llvm::Value *value = EmitLoadOfLValue(LV);
>> -  llvm::Value *input = value;
>>    llvm::PHINode *atomicPHI = 0;
>> +  llvm::Value *value;
>> +  llvm::Value *input;
>>
>>    int amount = (isInc ? 1 : -1);
>>
>>    if (const AtomicType *atomicTy = type->getAs<AtomicType>()) {
>> +    type = atomicTy->getValueType();
>> +    if (isInc && type->isBooleanType()) {
>> +      llvm::Value *True = CGF.EmitToMemory(Builder.getTrue(), type);
>> +      if (isPre) {
>> +        Builder.Insert(new llvm::StoreInst(True,
>> +              LV.getAddress(), LV.isVolatileQualified(),
>> +              LV.getAlignment().getQuantity(),
>> +              llvm::SequentiallyConsistent));
>> +        return Builder.getTrue();
>> +      }
>> +      // For atomic bool increment, we just store true and return it for
>> +      // preincrement, do an atomic swap with true for postincrement
>> +        return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
>> +            LV.getAddress(), True, llvm::SequentiallyConsistent);
>> +    }
>> +    // Special case for atomic increment / decrement on integers, emit
>> +    // atomicrmw instructions.  We skip this if we want to be doing overflow
>> +    // checking, and fall into the slow path with the atomic cmpxchg loop.
>> +    if (!type->isBooleanType() && type->isIntegerType() &&
>> +        !(type->isUnsignedIntegerType() &&
>> +         CGF.SanOpts->UnsignedIntegerOverflow) &&
>> +        CGF.getLangOpts().getSignedOverflowBehavior() !=
>> +         LangOptions::SOB_Trapping) {
>
> I'm not sure this is wrong, but note that "For signed integer types,
> arithmetic is defined to use two’s complement representation with
> silent wrap-around on overflow; there are no undefined results." in
> C11 7.17.7.5. Likely you want to use SanOpts->UnsignedIntegerOverflow
> for both signed and unsigned atomics.
>
> I'm not terribly familiar with the code below here, but it looks like
> the cmpxchg loop is wrong for this too.

Never mind, I think. You're talking about built-in operators, while I
quoted the explicit functions.

>> +      llvm::AtomicRMWInst::BinOp aop = isInc ? llvm::AtomicRMWInst::Add :
>> +        llvm::AtomicRMWInst::Sub;
>> +      llvm::Instruction::BinaryOps op = isInc ? llvm::Instruction::Add :
>> +        llvm::Instruction::Sub;
>> +      llvm::Value *amt = CGF.EmitToMemory(
>> +          llvm::ConstantInt::get(ConvertType(type), 1, true), type);
>> +      llvm::Value *old = Builder.CreateAtomicRMW(aop,
>> +          LV.getAddress(), amt, llvm::SequentiallyConsistent);
>> +      return isPre ? Builder.CreateBinOp(op, old, amt) : old;
>> +    }
>> +    value = EmitLoadOfLValue(LV);
>> +    input = value;
>> +    // For every other atomic operation, we need to emit a load-op-cmpxchg loop
>>      llvm::BasicBlock *startBB = Builder.GetInsertBlock();
>>      llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn);
>> +    value = CGF.EmitToMemory(value, type);
>>      Builder.CreateBr(opBB);
>>      Builder.SetInsertPoint(opBB);
>>      atomicPHI = Builder.CreatePHI(value->getType(), 2);
>>      atomicPHI->addIncoming(value, startBB);
>> -    type = atomicTy->getValueType();
>>      value = atomicPHI;
>> +  } else {
>> +    value = EmitLoadOfLValue(LV);
>> +    input = value;
>>    }
>>
>>    // Special case of integer increment that we have to check first: bool++.
>> @@ -1596,7 +1635,7 @@ ScalarExprEmitter::EmitScalarPrePostIncD
>>      llvm::BasicBlock *opBB = Builder.GetInsertBlock();
>>      llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
>>      llvm::Value *old = Builder.CreateAtomicCmpXchg(LV.getAddress(), atomicPHI,
>> -        value, llvm::SequentiallyConsistent);
>> +        CGF.EmitToMemory(value, type), llvm::SequentiallyConsistent);
>>      atomicPHI->addIncoming(old, opBB);
>>      llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
>>      Builder.CreateCondBr(success, contBB, opBB);
>> @@ -1872,20 +1911,63 @@ LValue ScalarExprEmitter::EmitCompoundAs
>>    OpInfo.E = E;
>>    // Load/convert the LHS.
>>    LValue LHSLV = EmitCheckedLValue(E->getLHS(), CodeGenFunction::TCK_Store);
>> -  OpInfo.LHS = EmitLoadOfLValue(LHSLV);
>>
>>    llvm::PHINode *atomicPHI = 0;
>> -  if (LHSTy->isAtomicType()) {
>> +  if (const AtomicType *atomicTy = LHSTy->getAs<AtomicType>()) {
>> +    QualType type = atomicTy->getValueType();
>> +    if (!type->isBooleanType() && type->isIntegerType() &&
>> +         !(type->isUnsignedIntegerType() &&
>> +          CGF.SanOpts->UnsignedIntegerOverflow) &&
>> +         CGF.getLangOpts().getSignedOverflowBehavior() !=
>> +          LangOptions::SOB_Trapping) {
>> +      llvm::AtomicRMWInst::BinOp aop = llvm::AtomicRMWInst::BAD_BINOP;
>> +      switch (OpInfo.Opcode) {
>> +        // We don't have atomicrmw operands for *, %, /, <<, >>
>> +        case BO_MulAssign: case BO_DivAssign:
>> +        case BO_RemAssign:
>> +        case BO_ShlAssign:
>> +        case BO_ShrAssign:
>> +          break;
>> +        case BO_AddAssign:
>> +          aop = llvm::AtomicRMWInst::Add;
>> +          break;
>> +        case BO_SubAssign:
>> +          aop = llvm::AtomicRMWInst::Sub;
>> +          break;
>> +        case BO_AndAssign:
>> +          aop = llvm::AtomicRMWInst::And;
>> +          break;
>> +        case BO_XorAssign:
>> +          aop = llvm::AtomicRMWInst::Xor;
>> +          break;
>> +        case BO_OrAssign:
>> +          aop = llvm::AtomicRMWInst::Or;
>> +          break;
>> +        default:
>> +          llvm_unreachable("Invalid compound assignment type");
>> +      }
>> +      if (aop != llvm::AtomicRMWInst::BAD_BINOP) {
>> +        llvm::Value *amt = CGF.EmitToMemory(EmitScalarConversion(OpInfo.RHS,
>> +              E->getRHS()->getType(), LHSTy), LHSTy);
>> +        Builder.CreateAtomicRMW(aop, LHSLV.getAddress(), amt,
>> +            llvm::SequentiallyConsistent);
>> +        return LHSLV;
>> +      }
>> +    }
>>      // FIXME: For floating point types, we should be saving and restoring the
>>      // floating point environment in the loop.
>>      llvm::BasicBlock *startBB = Builder.GetInsertBlock();
>>      llvm::BasicBlock *opBB = CGF.createBasicBlock("atomic_op", CGF.CurFn);
>> +    OpInfo.LHS = EmitLoadOfLValue(LHSLV);
>> +    OpInfo.LHS = CGF.EmitToMemory(OpInfo.LHS, type);
>>      Builder.CreateBr(opBB);
>>      Builder.SetInsertPoint(opBB);
>>      atomicPHI = Builder.CreatePHI(OpInfo.LHS->getType(), 2);
>>      atomicPHI->addIncoming(OpInfo.LHS, startBB);
>>      OpInfo.LHS = atomicPHI;
>>    }
>> +  else
>> +    OpInfo.LHS = EmitLoadOfLValue(LHSLV);
>>
>>    OpInfo.LHS = EmitScalarConversion(OpInfo.LHS, LHSTy,
>>                                      E->getComputationLHSType());
>> @@ -1900,7 +1982,7 @@ LValue ScalarExprEmitter::EmitCompoundAs
>>      llvm::BasicBlock *opBB = Builder.GetInsertBlock();
>>      llvm::BasicBlock *contBB = CGF.createBasicBlock("atomic_cont", CGF.CurFn);
>>      llvm::Value *old = Builder.CreateAtomicCmpXchg(LHSLV.getAddress(), atomicPHI,
>> -        Result, llvm::SequentiallyConsistent);
>> +        CGF.EmitToMemory(Result, LHSTy), llvm::SequentiallyConsistent);
>>      atomicPHI->addIncoming(old, opBB);
>>      llvm::Value *success = Builder.CreateICmpEQ(old, atomicPHI);
>>      Builder.CreateCondBr(success, contBB, opBB);
>>
>> Modified: cfe/trunk/test/CodeGen/atomic_ops.c
>> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/atomic_ops.c?rev=176420&r1=176419&r2=176420&view=diff
>> ==============================================================================
>> --- cfe/trunk/test/CodeGen/atomic_ops.c (original)
>> +++ cfe/trunk/test/CodeGen/atomic_ops.c Sun Mar  3 10:02:42 2013
>> @@ -15,9 +15,4 @@ void foo(int x)
>>    // CHECK: sdiv i32
>>    // CHECK: cmpxchg i16*
>>
>> -  // These should be emitting atomicrmw instructions, but they aren't yet
>> -  i += 2; // CHECK: cmpxchg
>> -  i -= 2; // CHECK: cmpxchg
>> -  i++; // CHECK: cmpxchg
>> -  i--; // CHECK: cmpxchg
>>  }
>>
>> Added: cfe/trunk/test/CodeGen/c11atomics.c
>> URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/c11atomics.c?rev=176420&view=auto
>> ==============================================================================
>> --- cfe/trunk/test/CodeGen/c11atomics.c (added)
>> +++ cfe/trunk/test/CodeGen/c11atomics.c Sun Mar  3 10:02:42 2013
>> @@ -0,0 +1,139 @@
>> +// RUN: %clang_cc1 %s -emit-llvm -o - -triple=armv7-unknown-freebsd -std=c11 | FileCheck %s
>> +
>> +// Test that we are generating atomicrmw instructions, rather than
>> +// compare-exchange loops for common atomic ops.  This makes a big difference
>> +// on RISC platforms, where the compare-exchange loop becomes a ll/sc pair for
>> +// the load and then another ll/sc in the loop, expanding to about 30
>> +// instructions when it should be only 4.  It has a smaller, but still
>> +// noticeable, impact on platforms like x86 and RISC-V, where there are atomic
>> +// RMW instructions.
>> +//
>> +// We currently emit cmpxchg loops for most operations on _Bools, because
>> +// they're sufficiently rare that it's not worth making sure that the semantics
>> +// are correct.
>> +
>> +typedef int __attribute__((vector_size(16))) vector;
>> +
>> +_Atomic(_Bool) b;
>> +_Atomic(int) i;
>> +_Atomic(long long) l;
>> +_Atomic(short) s;
>> +_Atomic(char*) p;
>> +_Atomic(float) f;
>> +_Atomic(vector) v;
>> +
>> +// CHECK-NOT: cmpxchg
>> +
>> +// CHECK: testinc
>> +void testinc(void)
>> +{
>> +  // Special case for suffix bool++, sets to true and returns the old value.
>> +  // CHECK: atomicrmw xchg i8* @b, i8 1 seq_cst
>> +  b++;
>> +  // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
>> +  i++;
>> +  // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
>> +  l++;
>> +  // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
>> +  s++;
>> +  // Prefix increment
>> +  // Special case for bool: set to true and return true
>> +  // CHECK: store atomic i8 1, i8* @b seq_cst, align 1
>> +  ++b;
>> +  // Currently, we have no variant of atomicrmw that returns the new value, so
>> +  // we have to generate an atomic add, which returns the old value, and then a
>> +  // non-atomic add.
>> +  // CHECK: atomicrmw add i32* @i, i32 1 seq_cst
>> +  // CHECK: add i32
>> +  ++i;
>> +  // CHECK: atomicrmw add i64* @l, i64 1 seq_cst
>> +  // CHECK: add i64
>> +  ++l;
>> +  // CHECK: atomicrmw add i16* @s, i16 1 seq_cst
>> +  // CHECK: add i16
>> +  ++s;
>> +}
>> +// CHECK: testdec
>> +void testdec(void)
>> +{
>> +  // CHECK: cmpxchg i8* @b
>> +  b--;
>> +  // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
>> +  i--;
>> +  // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
>> +  l--;
>> +  // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
>> +  s--;
>> +  // CHECK: cmpxchg i8* @b
>> +  --b;
>> +  // CHECK: atomicrmw sub i32* @i, i32 1 seq_cst
>> +  // CHECK: sub i32
>> +  --i;
>> +  // CHECK: atomicrmw sub i64* @l, i64 1 seq_cst
>> +  // CHECK: sub i64
>> +  --l;
>> +  // CHECK: atomicrmw sub i16* @s, i16 1 seq_cst
>> +  // CHECK: sub i16
>> +  --s;
>> +}
>> +// CHECK: testaddeq
>> +void testaddeq(void)
>> +{
>> +  // CHECK: cmpxchg i8* @b
>> +  // CHECK: atomicrmw add i32* @i, i32 42 seq_cst
>> +  // CHECK: atomicrmw add i64* @l, i64 42 seq_cst
>> +  // CHECK: atomicrmw add i16* @s, i16 42 seq_cst
>> +  b += 42;
>> +  i += 42;
>> +  l += 42;
>> +  s += 42;
>> +}
>> +// CHECK: testsubeq
>> +void testsubeq(void)
>> +{
>> +  // CHECK: cmpxchg i8* @b
>> +  // CHECK: atomicrmw sub i32* @i, i32 42 seq_cst
>> +  // CHECK: atomicrmw sub i64* @l, i64 42 seq_cst
>> +  // CHECK: atomicrmw sub i16* @s, i16 42 seq_cst
>> +  b -= 42;
>> +  i -= 42;
>> +  l -= 42;
>> +  s -= 42;
>> +}
>> +// CHECK: testxoreq
>> +void testxoreq(void)
>> +{
>> +  // CHECK: cmpxchg i8* @b
>> +  // CHECK: atomicrmw xor i32* @i, i32 42 seq_cst
>> +  // CHECK: atomicrmw xor i64* @l, i64 42 seq_cst
>> +  // CHECK: atomicrmw xor i16* @s, i16 42 seq_cst
>> +  b ^= 42;
>> +  i ^= 42;
>> +  l ^= 42;
>> +  s ^= 42;
>> +}
>> +// CHECK: testoreq
>> +void testoreq(void)
>> +{
>> +  // CHECK: cmpxchg i8* @b
>> +  // CHECK: atomicrmw or i32* @i, i32 42 seq_cst
>> +  // CHECK: atomicrmw or i64* @l, i64 42 seq_cst
>> +  // CHECK: atomicrmw or i16* @s, i16 42 seq_cst
>> +  b |= 42;
>> +  i |= 42;
>> +  l |= 42;
>> +  s |= 42;
>> +}
>> +// CHECK: testandeq
>> +void testandeq(void)
>> +{
>> +  // CHECK: cmpxchg i8* @b
>> +  // CHECK: atomicrmw and i32* @i, i32 42 seq_cst
>> +  // CHECK: atomicrmw and i64* @l, i64 42 seq_cst
>> +  // CHECK: atomicrmw and i16* @s, i16 42 seq_cst
>> +  b &= 42;
>> +  i &= 42;
>> +  l &= 42;
>> +  s &= 42;
>> +}
>> +
>>
>>
>> _______________________________________________
>> cfe-commits mailing list
>> cfe-commits at cs.uiuc.edu
>> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits