r334059 - Implement bittest intrinsics generically for non-x86 platforms

Reid Kleckner via cfe-commits cfe-commits at lists.llvm.org
Tue Jun 5 18:35:08 PDT 2018


Author: rnk
Date: Tue Jun  5 18:35:08 2018
New Revision: 334059

URL: http://llvm.org/viewvc/llvm-project?rev=334059&view=rev
Log:
Implement bittest intrinsics generically for non-x86 platforms

I tested these locally on an x86 machine by disabling the inline asm
codepath and confirming that it does the same bitflips as we do with the
inline asm.

Addresses code review feedback.

Modified:
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp
    cfe/trunk/test/CodeGen/bittest-intrin.c

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=334059&r1=334058&r2=334059&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Tue Jun  5 18:35:08 2018
@@ -484,11 +484,48 @@ CodeGenFunction::emitBuiltinObjectSize(c
   return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
 }
 
-static RValue EmitBitTestIntrinsic(CodeGenFunction &CGF, const CallExpr *E,
-                                   char TestAnd, char Size,
-                                   bool Locked = false) {
-  Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
-  Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
+// Get properties of an X86 BT* assembly instruction. The first returned value
+// is the action character code, which can be for complement, reset, or set. The
+// second is the size suffix which our assembler needs. The last is whether to
+// add the lock prefix.
+static std::tuple<char, char, bool>
+getBitTestActionSizeAndLocking(unsigned BuiltinID) {
+  switch (BuiltinID) {
+  case Builtin::BI_bittest:
+    return {'\0', 'l', false};
+  case Builtin::BI_bittestandcomplement:
+    return {'c', 'l', false};
+  case Builtin::BI_bittestandreset:
+    return {'r', 'l', false};
+  case Builtin::BI_bittestandset:
+    return {'s', 'l', false};
+  case Builtin::BI_interlockedbittestandreset:
+    return {'r', 'l', /*Locked=*/true};
+  case Builtin::BI_interlockedbittestandset:
+    return {'s', 'l', /*Locked=*/true};
+
+  case Builtin::BI_bittest64:
+    return {'\0', 'q', false};
+  case Builtin::BI_bittestandcomplement64:
+    return {'c', 'q', false};
+  case Builtin::BI_bittestandreset64:
+    return {'r', 'q', false};
+  case Builtin::BI_bittestandset64:
+    return {'s', 'q', false};
+  case Builtin::BI_interlockedbittestandreset64:
+    return {'r', 'q', /*Locked=*/true};
+  case Builtin::BI_interlockedbittestandset64:
+    return {'s', 'q', /*Locked=*/true};
+  }
+  llvm_unreachable("expected only bittest builtins");
+}
+
+static RValue EmitX86BitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID,
+                                      const CallExpr *E, Value *BitBase,
+                                      Value *BitPos) {
+  char Action, Size;
+  bool Locked;
+  std::tie(Action, Size, Locked) = getBitTestActionSizeAndLocking(BuiltinID);
 
   // Build the assembly.
   SmallString<64> Asm;
@@ -496,12 +533,12 @@ static RValue EmitBitTestIntrinsic(CodeG
   if (Locked)
     AsmOS << "lock ";
   AsmOS << "bt";
-  if (TestAnd)
-    AsmOS << TestAnd;
+  if (Action)
+    AsmOS << Action;
   AsmOS << Size << " $2, ($1)\n\tsetc ${0:b}";
 
   // Build the constraints. FIXME: We should support immediates when possible.
-  std::string Constraints = "=r,r,r,~{cc},~{flags},~{memory},~{fpsr}";
+  std::string Constraints = "=r,r,r,~{cc},~{flags},~{fpsr}";
   llvm::IntegerType *IntType = llvm::IntegerType::get(
       CGF.getLLVMContext(),
       CGF.getContext().getTypeSize(E->getArg(1)->getType()));
@@ -515,6 +552,97 @@ static RValue EmitBitTestIntrinsic(CodeG
   return RValue::get(CS.getInstruction());
 }
 
+/// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
+/// bits and a bit position and read and optionally modify the bit at that
+/// position. The position index can be arbitrarily large, i.e. it can be larger
+/// than 31 or 63, so we need an indexed load in the general case.
+static RValue EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID,
+                                   const CallExpr *E) {
+  Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
+  Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
+
+  // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
+  // indexing operation internally. Use them if possible.
+  llvm::Triple::ArchType Arch = CGF.getTarget().getTriple().getArch();
+  if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64)
+    return EmitX86BitTestIntrinsic(CGF, BuiltinID, E, BitBase, BitPos);
+
+  // Otherwise, use generic code to load one byte and test the bit. Use all but
+  // the bottom three bits as the array index, and the bottom three bits to form
+  // a mask.
+  // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
+  Value *ByteIndex = CGF.Builder.CreateAShr(
+      BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
+  Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
+  Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
+                                                 ByteIndex, "bittest.byteaddr"),
+                   CharUnits::One());
+  Value *PosLow =
+      CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
+                            llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
+
+  // The updating instructions will need a mask.
+  Value *Mask = nullptr;
+  if (BuiltinID != Builtin::BI_bittest && BuiltinID != Builtin::BI_bittest64) {
+    Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
+                                 "bittest.mask");
+  }
+
+  // Emit a combined atomicrmw load/store operation for the interlocked
+  // intrinsics.
+  Value *OldByte = nullptr;
+  switch (BuiltinID) {
+  case Builtin::BI_interlockedbittestandreset:
+  case Builtin::BI_interlockedbittestandreset64:
+    OldByte = CGF.Builder.CreateAtomicRMW(
+        AtomicRMWInst::And, ByteAddr.getPointer(), CGF.Builder.CreateNot(Mask),
+        llvm::AtomicOrdering::SequentiallyConsistent);
+    break;
+  case Builtin::BI_interlockedbittestandset:
+  case Builtin::BI_interlockedbittestandset64:
+    OldByte = CGF.Builder.CreateAtomicRMW(
+        AtomicRMWInst::Or, ByteAddr.getPointer(), Mask,
+        llvm::AtomicOrdering::SequentiallyConsistent);
+    break;
+  default:
+    break;
+  }
+
+  // Emit a plain load for the non-interlocked intrinsics.
+  if (!OldByte) {
+    OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
+    Value *NewByte = nullptr;
+    switch (BuiltinID) {
+    case Builtin::BI_bittest:
+    case Builtin::BI_bittest64:
+      // Don't store anything.
+      break;
+    case Builtin::BI_bittestandcomplement:
+    case Builtin::BI_bittestandcomplement64:
+      NewByte = CGF.Builder.CreateXor(OldByte, Mask);
+      break;
+    case Builtin::BI_bittestandreset:
+    case Builtin::BI_bittestandreset64:
+      NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
+      break;
+    case Builtin::BI_bittestandset:
+    case Builtin::BI_bittestandset64:
+      NewByte = CGF.Builder.CreateOr(OldByte, Mask);
+      break;
+    default:
+      llvm_unreachable("non bittest family builtin");
+    }
+    if (NewByte)
+      CGF.Builder.CreateStore(NewByte, ByteAddr);
+  }
+
+  // However we loaded the old byte, either by plain load or atomicrmw, shift
+  // the bit into the low position and mask it to 0 or 1.
+  Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
+  return RValue::get(CGF.Builder.CreateAnd(
+      ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res"));
+}
+
 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
 // handle them here.
 enum class CodeGenFunction::MSVCIntrin {
@@ -2806,31 +2934,19 @@ RValue CodeGenFunction::EmitBuiltinExpr(
   case Builtin::BI_InterlockedXor:
     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
 
+  case Builtin::BI_bittest64:
   case Builtin::BI_bittest:
-    return EmitBitTestIntrinsic(*this, E, '\0', 'l');
+  case Builtin::BI_bittestandcomplement64:
   case Builtin::BI_bittestandcomplement:
-    return EmitBitTestIntrinsic(*this, E, 'c', 'l');
+  case Builtin::BI_bittestandreset64:
   case Builtin::BI_bittestandreset:
-    return EmitBitTestIntrinsic(*this, E, 'r', 'l');
+  case Builtin::BI_bittestandset64:
   case Builtin::BI_bittestandset:
-    return EmitBitTestIntrinsic(*this, E, 's', 'l');
   case Builtin::BI_interlockedbittestandreset:
-    return EmitBitTestIntrinsic(*this, E, 'r', 'l', /*Locked=*/true);
-  case Builtin::BI_interlockedbittestandset:
-    return EmitBitTestIntrinsic(*this, E, 's', 'l', /*Locked=*/true);
-
-  case Builtin::BI_bittest64:
-    return EmitBitTestIntrinsic(*this, E, '\0', 'q');
-  case Builtin::BI_bittestandcomplement64:
-    return EmitBitTestIntrinsic(*this, E, 'c', 'q');
-  case Builtin::BI_bittestandreset64:
-    return EmitBitTestIntrinsic(*this, E, 'r', 'q');
-  case Builtin::BI_bittestandset64:
-    return EmitBitTestIntrinsic(*this, E, 's', 'q');
   case Builtin::BI_interlockedbittestandreset64:
-    return EmitBitTestIntrinsic(*this, E, 'r', 'q', /*Locked=*/true);
   case Builtin::BI_interlockedbittestandset64:
-    return EmitBitTestIntrinsic(*this, E, 's', 'q', /*Locked=*/true);
+  case Builtin::BI_interlockedbittestandset:
+    return EmitBitTestIntrinsic(*this, BuiltinID, E);
 
   case Builtin::BI__exception_code:
   case Builtin::BI_exception_code:

Modified: cfe/trunk/test/CodeGen/bittest-intrin.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/bittest-intrin.c?rev=334059&r1=334058&r2=334059&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/bittest-intrin.c (original)
+++ cfe/trunk/test/CodeGen/bittest-intrin.c Tue Jun  5 18:35:08 2018
@@ -1,4 +1,6 @@
-// RUN: %clang_cc1 -fms-extensions -triple x86_64-windows-msvc %s -emit-llvm -o - | FileCheck %s
+// RUN: %clang_cc1 -fms-extensions -triple x86_64-windows-msvc %s -emit-llvm -o - | FileCheck %s --check-prefix=X64
+// RUN: %clang_cc1 -fms-extensions -triple thumbv7-windows-msvc %s -emit-llvm -o - | FileCheck %s --check-prefix=ARM
+// RUN: %clang_cc1 -fms-extensions -triple aarch64-windows-msvc %s -emit-llvm -o - | FileCheck %s --check-prefix=ARM
 
 volatile unsigned char sink = 0;
 void test32(long *base, long idx) {
@@ -18,18 +20,105 @@ void test64(__int64 *base, __int64 idx)
   sink = _interlockedbittestandset64(base, idx);
 }
 
-// CHECK-LABEL: define dso_local void @test32(i32* %base, i32 %idx)
-// CHECK: call i8 asm sideeffect "btl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
-// CHECK: call i8 asm sideeffect "btcl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
-// CHECK: call i8 asm sideeffect "btrl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
-// CHECK: call i8 asm sideeffect "btsl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
-// CHECK: call i8 asm sideeffect "lock btrl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
-// CHECK: call i8 asm sideeffect "lock btsl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
-
-// CHECK-LABEL: define dso_local void @test64(i64* %base, i64 %idx)
-// CHECK: call i8 asm sideeffect "btq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
-// CHECK: call i8 asm sideeffect "btcq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
-// CHECK: call i8 asm sideeffect "btrq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
-// CHECK: call i8 asm sideeffect "btsq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
-// CHECK: call i8 asm sideeffect "lock btrq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
-// CHECK: call i8 asm sideeffect "lock btsq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
+// X64-LABEL: define dso_local void @test32(i32* %base, i32 %idx)
+// X64: call i8 asm sideeffect "btl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
+// X64: call i8 asm sideeffect "btcl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
+// X64: call i8 asm sideeffect "btrl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
+// X64: call i8 asm sideeffect "btsl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
+// X64: call i8 asm sideeffect "lock btrl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
+// X64: call i8 asm sideeffect "lock btsl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
+
+// X64-LABEL: define dso_local void @test64(i64* %base, i64 %idx)
+// X64: call i8 asm sideeffect "btq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
+// X64: call i8 asm sideeffect "btcq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
+// X64: call i8 asm sideeffect "btrq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
+// X64: call i8 asm sideeffect "btsq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
+// X64: call i8 asm sideeffect "lock btrq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
+// X64: call i8 asm sideeffect "lock btsq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
+
+// ARM-LABEL: define dso_local {{.*}}void @test32(i32* %base, i32 %idx)
+// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3
+// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8*
+// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]]
+// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8
+// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
+// ARM: %[[BYTE:[^ ]*]] = load i8, i8* %[[BYTEADDR]], align 1
+// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
+// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
+// ARM: store volatile i8 %[[RES]], i8* @sink, align 1
+
+// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3
+// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8*
+// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]]
+// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8
+// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
+// ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]]
+// ARM: %[[BYTE:[^ ]*]] = load i8, i8* %[[BYTEADDR]], align 1
+// ARM: %[[NEWBYTE:[^ ]*]] = xor i8 %[[BYTE]], %[[MASK]]
+// ARM  store i8 %[[NEWBYTE]], i8* %[[BYTEADDR]], align 1
+// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
+// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
+// ARM: store volatile i8 %[[RES]], i8* @sink, align 1
+
+// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3
+// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8*
+// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]]
+// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8
+// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
+// ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]]
+// ARM: %[[BYTE:[^ ]*]] = load i8, i8* %[[BYTEADDR]], align 1
+// ARM: %[[NOTMASK:[^ ]*]] = xor i8 %[[MASK]], -1
+// ARM: %[[NEWBYTE:[^ ]*]] = and i8 %[[BYTE]], %[[NOTMASK]]
+// ARM  store i8 %[[NEWBYTE]], i8* %[[BYTEADDR]], align 1
+// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
+// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
+// ARM: store volatile i8 %[[RES]], i8* @sink, align 1
+
+// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3
+// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8*
+// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]]
+// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8
+// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
+// ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]]
+// ARM: %[[BYTE:[^ ]*]] = load i8, i8* %[[BYTEADDR]], align 1
+// ARM: %[[NEWBYTE:[^ ]*]] = or i8 %[[BYTE]], %[[MASK]]
+// ARM  store i8 %[[NEWBYTE]], i8* %[[BYTEADDR]], align 1
+// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
+// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
+// ARM: store volatile i8 %[[RES]], i8* @sink, align 1
+
+// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3
+// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8*
+// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]]
+// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8
+// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
+// ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]]
+// ARM: %[[NOTMASK:[^ ]*]] = xor i8 %[[MASK]], -1
+// ARM: %[[BYTE:[^ ]*]] = atomicrmw and i8* %[[BYTEADDR]], i8 %[[NOTMASK]] seq_cst
+// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
+// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
+// ARM: store volatile i8 %[[RES]], i8* @sink, align 1
+
+// ARM: %[[IDXHI:[^ ]*]] = ashr i32 %{{.*}}, 3
+// ARM: %[[BASE:[^ ]*]] = bitcast i32* %{{.*}} to i8*
+// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i32 %[[IDXHI]]
+// ARM: %[[IDX8:[^ ]*]] = trunc i32 %{{.*}} to i8
+// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
+// ARM: %[[MASK:[^ ]*]] = shl i8 1, %[[IDXLO]]
+// ARM: %[[BYTE:[^ ]*]] = atomicrmw or i8* %[[BYTEADDR]], i8 %[[MASK]] seq_cst
+// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
+// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
+// ARM: store volatile i8 %[[RES]], i8* @sink, align 1
+
+// ARM-LABEL: define dso_local {{.*}}void @test64(i64* %base, i64 %idx)
+// ARM: %[[IDXHI:[^ ]*]] = ashr i64 %{{.*}}, 3
+// ARM: %[[BASE:[^ ]*]] = bitcast i64* %{{.*}} to i8*
+// ARM: %[[BYTEADDR:[^ ]*]] = getelementptr inbounds i8, i8* %[[BASE]], i64 %[[IDXHI]]
+// ARM: %[[IDX8:[^ ]*]] = trunc i64 %{{.*}} to i8
+// ARM: %[[IDXLO:[^ ]*]] = and i8 %[[IDX8]], 7
+// ARM: %[[BYTE:[^ ]*]] = load i8, i8* %[[BYTEADDR]], align 1
+// ARM: %[[BYTESHR:[^ ]*]] = lshr i8 %[[BYTE]], %[[IDXLO]]
+// ARM: %[[RES:[^ ]*]] = and i8 %[[BYTESHR]], 1
+// ARM: store volatile i8 %[[RES]], i8* @sink, align 1
+
+// ... the rest is the same, but with i64 instead of i32.




More information about the cfe-commits mailing list