r333978 - Reimplement the bittest intrinsic family as builtins with inline asm

Reid Kleckner via cfe-commits cfe-commits at lists.llvm.org
Mon Jun 4 18:33:40 PDT 2018


Author: rnk
Date: Mon Jun  4 18:33:40 2018
New Revision: 333978

URL: http://llvm.org/viewvc/llvm-project?rev=333978&view=rev
Log:
Reimplement the bittest intrinsic family as builtins with inline asm

We need to implement _interlockedbittestandset as a builtin for
windows.h, so we might as well do the whole family. It reduces code
duplication anyway.

Fixes PR33188, a long standing bug in our bittest implementation
encountered by Chakra.

Added:
    cfe/trunk/test/CodeGen/bittest-intrin.c
Modified:
    cfe/trunk/include/clang/Basic/Builtins.def
    cfe/trunk/lib/CodeGen/CGBuiltin.cpp
    cfe/trunk/lib/Headers/intrin.h
    cfe/trunk/test/CodeGen/ms-intrinsics-other.c
    cfe/trunk/test/CodeGen/ms-intrinsics.c

Modified: cfe/trunk/include/clang/Basic/Builtins.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/Builtins.def?rev=333978&r1=333977&r2=333978&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/Builtins.def (original)
+++ cfe/trunk/include/clang/Basic/Builtins.def Mon Jun  4 18:33:40 2018
@@ -744,6 +744,14 @@ BUILTIN(__builtin_rindex, "c*cC*i", "Fn"
 LANGBUILTIN(_alloca,          "v*z", "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(__annotation,     "wC*.","n", ALL_MS_LANGUAGES)
 LANGBUILTIN(__assume,         "vb",  "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_bittest,                "UcNiC*Ni", "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_bittestandcomplement,   "UcNi*Ni", "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_bittestandreset,        "UcNi*Ni", "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_bittestandset,          "UcNi*Ni", "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_bittest64,              "UcWiC*Wi", "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_bittestandcomplement64, "UcWi*Wi", "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_bittestandreset64,      "UcWi*Wi", "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_bittestandset64,        "UcWi*Wi", "n", ALL_MS_LANGUAGES)
 LIBBUILTIN(_byteswap_ushort, "UsUs",     "fnc", "stdlib.h", ALL_MS_LANGUAGES)
 LIBBUILTIN(_byteswap_ulong,  "UNiUNi",   "fnc", "stdlib.h", ALL_MS_LANGUAGES)
 LIBBUILTIN(_byteswap_uint64, "ULLiULLi", "fnc", "stdlib.h", ALL_MS_LANGUAGES)
@@ -783,7 +791,10 @@ LANGBUILTIN(_InterlockedOr,   "NiNiD*Ni"
 LANGBUILTIN(_InterlockedXor8,  "ccD*c",       "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_InterlockedXor16, "ssD*s",       "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(_InterlockedXor,   "NiNiD*Ni",    "n", ALL_MS_LANGUAGES)
-LANGBUILTIN(_interlockedbittestandset, "UcNiD*Ni", "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_interlockedbittestandreset,   "UcNiD*Ni", "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_interlockedbittestandreset64, "UcWiD*Wi", "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_interlockedbittestandset,   "UcNiD*Ni", "n", ALL_MS_LANGUAGES)
+LANGBUILTIN(_interlockedbittestandset64, "UcWiD*Wi", "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(__noop,           "i.",  "n", ALL_MS_LANGUAGES)
 LANGBUILTIN(__popcnt16, "UsUs",     "nc", ALL_MS_LANGUAGES)
 LANGBUILTIN(__popcnt,   "UiUi",     "nc", ALL_MS_LANGUAGES)

Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=333978&r1=333977&r2=333978&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Mon Jun  4 18:33:40 2018
@@ -484,6 +484,37 @@ CodeGenFunction::emitBuiltinObjectSize(c
   return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
 }
 
+static RValue EmitBitTestIntrinsic(CodeGenFunction &CGF, const CallExpr *E,
+                                   char TestAnd, char Size,
+                                   bool Locked = false) {
+  Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
+  Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
+
+  // Build the assembly.
+  SmallString<64> Asm;
+  raw_svector_ostream AsmOS(Asm);
+  if (Locked)
+    AsmOS << "lock ";
+  AsmOS << "bt";
+  if (TestAnd)
+    AsmOS << TestAnd;
+  AsmOS << Size << " $2, ($1)\n\tsetc ${0:b}";
+
+  // Build the constraints. FIXME: We should support immediates when possible.
+  std::string Constraints = "=r,r,r,~{cc},~{flags},~{memory},~{fpsr}";
+  llvm::IntegerType *IntType = llvm::IntegerType::get(
+      CGF.getLLVMContext(),
+      CGF.getContext().getTypeSize(E->getArg(1)->getType()));
+  llvm::Type *IntPtrType = IntType->getPointerTo();
+  llvm::FunctionType *FTy =
+      llvm::FunctionType::get(CGF.Int8Ty, {IntPtrType, IntType}, false);
+
+  llvm::InlineAsm *IA =
+      llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
+  CallSite CS = CGF.Builder.CreateCall(IA, {BitBase, BitPos});
+  return RValue::get(CS.getInstruction());
+}
+
 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
 // handle them here.
 enum class CodeGenFunction::MSVCIntrin {
@@ -497,7 +528,6 @@ enum class CodeGenFunction::MSVCIntrin {
   _InterlockedIncrement,
   _InterlockedOr,
   _InterlockedXor,
-  _interlockedbittestandset,
   __fastfail,
 };
 
@@ -565,22 +595,6 @@ Value *CodeGenFunction::EmitMSVCBuiltinE
   case MSVCIntrin::_InterlockedXor:
     return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
 
-  case MSVCIntrin::_interlockedbittestandset: {
-    llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
-    llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
-    AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
-        AtomicRMWInst::Or, Addr,
-        Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
-        llvm::AtomicOrdering::SequentiallyConsistent);
-    // Shift the relevant bit to the least significant position, truncate to
-    // the result type, and test the low bit.
-    llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
-    llvm::Value *Truncated =
-        Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
-    return Builder.CreateAnd(Truncated,
-                             ConstantInt::get(Truncated->getType(), 1));
-  }
-
   case MSVCIntrin::_InterlockedDecrement: {
     llvm::Type *IntTy = ConvertType(E->getType());
     AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
@@ -2791,9 +2805,32 @@ RValue CodeGenFunction::EmitBuiltinExpr(
   case Builtin::BI_InterlockedXor16:
   case Builtin::BI_InterlockedXor:
     return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
+
+  case Builtin::BI_bittest:
+    return EmitBitTestIntrinsic(*this, E, '\0', 'l');
+  case Builtin::BI_bittestandcomplement:
+    return EmitBitTestIntrinsic(*this, E, 'c', 'l');
+  case Builtin::BI_bittestandreset:
+    return EmitBitTestIntrinsic(*this, E, 'r', 'l');
+  case Builtin::BI_bittestandset:
+    return EmitBitTestIntrinsic(*this, E, 's', 'l');
+  case Builtin::BI_interlockedbittestandreset:
+    return EmitBitTestIntrinsic(*this, E, 'r', 'l', /*Locked=*/true);
   case Builtin::BI_interlockedbittestandset:
-    return RValue::get(
-        EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
+    return EmitBitTestIntrinsic(*this, E, 's', 'l', /*Locked=*/true);
+
+  case Builtin::BI_bittest64:
+    return EmitBitTestIntrinsic(*this, E, '\0', 'q');
+  case Builtin::BI_bittestandcomplement64:
+    return EmitBitTestIntrinsic(*this, E, 'c', 'q');
+  case Builtin::BI_bittestandreset64:
+    return EmitBitTestIntrinsic(*this, E, 'r', 'q');
+  case Builtin::BI_bittestandset64:
+    return EmitBitTestIntrinsic(*this, E, 's', 'q');
+  case Builtin::BI_interlockedbittestandreset64:
+    return EmitBitTestIntrinsic(*this, E, 'r', 'q', /*Locked=*/true);
+  case Builtin::BI_interlockedbittestandset64:
+    return EmitBitTestIntrinsic(*this, E, 's', 'q', /*Locked=*/true);
 
   case Builtin::BI__exception_code:
   case Builtin::BI_exception_code:

Modified: cfe/trunk/lib/Headers/intrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/intrin.h?rev=333978&r1=333977&r2=333978&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/intrin.h (original)
+++ cfe/trunk/lib/Headers/intrin.h Mon Jun  4 18:33:40 2018
@@ -161,13 +161,9 @@ static __inline__
 unsigned char _BitScanForward(unsigned long *_Index, unsigned long _Mask);
 static __inline__
 unsigned char _BitScanReverse(unsigned long *_Index, unsigned long _Mask);
-static __inline__
 unsigned char _bittest(long const *, long);
-static __inline__
 unsigned char _bittestandcomplement(long *, long);
-static __inline__
 unsigned char _bittestandreset(long *, long);
-static __inline__
 unsigned char _bittestandset(long *, long);
 void __cdecl _disable(void);
 void __cdecl _enable(void);
@@ -260,20 +256,15 @@ static __inline__
 unsigned char _BitScanForward64(unsigned long *_Index, unsigned __int64 _Mask);
 static __inline__
 unsigned char _BitScanReverse64(unsigned long *_Index, unsigned __int64 _Mask);
-static __inline__
 unsigned char _bittest64(__int64 const *, __int64);
-static __inline__
 unsigned char _bittestandcomplement64(__int64 *, __int64);
-static __inline__
 unsigned char _bittestandreset64(__int64 *, __int64);
-static __inline__
 unsigned char _bittestandset64(__int64 *, __int64);
 long _InterlockedAnd_np(long volatile *_Value, long _Mask);
 short _InterlockedAnd16_np(short volatile *_Value, short _Mask);
 __int64 _InterlockedAnd64_np(__int64 volatile *_Value, __int64 _Mask);
 char _InterlockedAnd8_np(char volatile *_Value, char _Mask);
 unsigned char _interlockedbittestandreset64(__int64 volatile *, __int64);
-static __inline__
 unsigned char _interlockedbittestandset64(__int64 volatile *, __int64);
 long _InterlockedCompareExchange_np(long volatile *_Destination, long _Exchange,
                                     long _Comparand);
@@ -342,78 +333,6 @@ __int64 _InterlockedAnd64(__int64 volati
 #endif
 
 /*----------------------------------------------------------------------------*\
-|* Bit Counting and Testing
-\*----------------------------------------------------------------------------*/
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_bittest(long const *_BitBase, long _BitPos) {
-  return (*_BitBase >> _BitPos) & 1;
-}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_bittestandcomplement(long *_BitBase, long _BitPos) {
-  unsigned char _Res = (*_BitBase >> _BitPos) & 1;
-  *_BitBase = *_BitBase ^ (1 << _BitPos);
-  return _Res;
-}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_bittestandreset(long *_BitBase, long _BitPos) {
-  unsigned char _Res = (*_BitBase >> _BitPos) & 1;
-  *_BitBase = *_BitBase & ~(1 << _BitPos);
-  return _Res;
-}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_bittestandset(long *_BitBase, long _BitPos) {
-  unsigned char _Res = (*_BitBase >> _BitPos) & 1;
-  *_BitBase = *_BitBase | (1 << _BitPos);
-  return _Res;
-}
-#if defined(__arm__) || defined(__aarch64__)
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_interlockedbittestandset_acq(long volatile *_BitBase, long _BitPos) {
-  long _PrevVal = __atomic_fetch_or(_BitBase, 1l << _BitPos, __ATOMIC_ACQUIRE);
-  return (_PrevVal >> _BitPos) & 1;
-}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_interlockedbittestandset_nf(long volatile *_BitBase, long _BitPos) {
-  long _PrevVal = __atomic_fetch_or(_BitBase, 1l << _BitPos, __ATOMIC_RELAXED);
-  return (_PrevVal >> _BitPos) & 1;
-}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_interlockedbittestandset_rel(long volatile *_BitBase, long _BitPos) {
-  long _PrevVal = __atomic_fetch_or(_BitBase, 1l << _BitPos, __ATOMIC_RELEASE);
-  return (_PrevVal >> _BitPos) & 1;
-}
-#endif
-#ifdef __x86_64__
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_bittest64(__int64 const *_BitBase, __int64 _BitPos) {
-  return (*_BitBase >> _BitPos) & 1;
-}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_bittestandcomplement64(__int64 *_BitBase, __int64 _BitPos) {
-  unsigned char _Res = (*_BitBase >> _BitPos) & 1;
-  *_BitBase = *_BitBase ^ (1ll << _BitPos);
-  return _Res;
-}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_bittestandreset64(__int64 *_BitBase, __int64 _BitPos) {
-  unsigned char _Res = (*_BitBase >> _BitPos) & 1;
-  *_BitBase = *_BitBase & ~(1ll << _BitPos);
-  return _Res;
-}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_bittestandset64(__int64 *_BitBase, __int64 _BitPos) {
-  unsigned char _Res = (*_BitBase >> _BitPos) & 1;
-  *_BitBase = *_BitBase | (1ll << _BitPos);
-  return _Res;
-}
-static __inline__ unsigned char __DEFAULT_FN_ATTRS
-_interlockedbittestandset64(__int64 volatile *_BitBase, __int64 _BitPos) {
-  long long _PrevVal =
-      __atomic_fetch_or(_BitBase, 1ll << _BitPos, __ATOMIC_SEQ_CST);
-  return (_PrevVal >> _BitPos) & 1;
-}
-#endif
-/*----------------------------------------------------------------------------*\
 |* Interlocked Exchange Add
 \*----------------------------------------------------------------------------*/
 #if defined(__arm__) || defined(__aarch64__)

Added: cfe/trunk/test/CodeGen/bittest-intrin.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/bittest-intrin.c?rev=333978&view=auto
==============================================================================
--- cfe/trunk/test/CodeGen/bittest-intrin.c (added)
+++ cfe/trunk/test/CodeGen/bittest-intrin.c Mon Jun  4 18:33:40 2018
@@ -0,0 +1,35 @@
+// RUN: %clang_cc1 -fms-extensions -triple x86_64-windows-msvc %s -emit-llvm -o - | FileCheck %s
+
+volatile unsigned char sink = 0;
+void test32(long *base, long idx) {
+  sink = _bittest(base, idx);
+  sink = _bittestandcomplement(base, idx);
+  sink = _bittestandreset(base, idx);
+  sink = _bittestandset(base, idx);
+  sink = _interlockedbittestandreset(base, idx);
+  sink = _interlockedbittestandset(base, idx);
+}
+void test64(__int64 *base, __int64 idx) {
+  sink = _bittest64(base, idx);
+  sink = _bittestandcomplement64(base, idx);
+  sink = _bittestandreset64(base, idx);
+  sink = _bittestandset64(base, idx);
+  sink = _interlockedbittestandreset64(base, idx);
+  sink = _interlockedbittestandset64(base, idx);
+}
+
+// CHECK-LABEL: define dso_local void @test32(i32* %base, i32 %idx)
+// CHECK: call i8 asm sideeffect "btl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
+// CHECK: call i8 asm sideeffect "btcl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
+// CHECK: call i8 asm sideeffect "btrl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
+// CHECK: call i8 asm sideeffect "btsl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
+// CHECK: call i8 asm sideeffect "lock btrl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
+// CHECK: call i8 asm sideeffect "lock btsl $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i32* %{{.*}}, i32 {{.*}})
+
+// CHECK-LABEL: define dso_local void @test64(i64* %base, i64 %idx)
+// CHECK: call i8 asm sideeffect "btq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
+// CHECK: call i8 asm sideeffect "btcq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
+// CHECK: call i8 asm sideeffect "btrq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
+// CHECK: call i8 asm sideeffect "btsq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
+// CHECK: call i8 asm sideeffect "lock btrq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})
+// CHECK: call i8 asm sideeffect "lock btsq $2, ($1)\0A\09setc ${0:b}", "=r,r,r,~{{.*}}"(i64* %{{.*}}, i64 {{.*}})

Modified: cfe/trunk/test/CodeGen/ms-intrinsics-other.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/ms-intrinsics-other.c?rev=333978&r1=333977&r2=333978&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/ms-intrinsics-other.c (original)
+++ cfe/trunk/test/CodeGen/ms-intrinsics-other.c Mon Jun  4 18:33:40 2018
@@ -148,14 +148,3 @@ LONG test_InterlockedDecrement(LONG vola
 // CHECK: [[RESULT:%[0-9]+]] = add i32 [[TMP]], -1
 // CHECK: ret i32 [[RESULT]]
 // CHECK: }
-
-unsigned char test_interlockedbittestandset(volatile LONG *ptr, LONG bit) {
-  return _interlockedbittestandset(ptr, bit);
-}
-// CHECK-LABEL: define{{.*}} i8 @test_interlockedbittestandset
-// CHECK: [[MASKBIT:%[0-9]+]] = shl i32 1, %bit
-// CHECK: [[OLD:%[0-9]+]] = atomicrmw or i32* %ptr, i32 [[MASKBIT]] seq_cst
-// CHECK: [[SHIFT:%[0-9]+]] = lshr i32 [[OLD]], %bit
-// CHECK: [[TRUNC:%[0-9]+]] = trunc i32 [[SHIFT]] to i8
-// CHECK: [[AND:%[0-9]+]] = and i8 [[TRUNC]], 1
-// CHECK: ret i8 [[AND]]

Modified: cfe/trunk/test/CodeGen/ms-intrinsics.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/ms-intrinsics.c?rev=333978&r1=333977&r2=333978&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/ms-intrinsics.c (original)
+++ cfe/trunk/test/CodeGen/ms-intrinsics.c Mon Jun  4 18:33:40 2018
@@ -455,17 +455,6 @@ __int64 test_InterlockedDecrement64(__in
 
 #endif
 
-unsigned char test_interlockedbittestandset(volatile long *ptr, long bit) {
-  return _interlockedbittestandset(ptr, bit);
-}
-// CHECK-LABEL: define{{.*}} i8 @test_interlockedbittestandset
-// CHECK: [[MASKBIT:%[0-9]+]] = shl i32 1, %bit
-// CHECK: [[OLD:%[0-9]+]] = atomicrmw or i32* %ptr, i32 [[MASKBIT]] seq_cst
-// CHECK: [[SHIFT:%[0-9]+]] = lshr i32 [[OLD]], %bit
-// CHECK: [[TRUNC:%[0-9]+]] = trunc i32 [[SHIFT]] to i8
-// CHECK: [[AND:%[0-9]+]] = and i8 [[TRUNC]], 1
-// CHECK: ret i8 [[AND]]
-
 void test__fastfail() {
   __fastfail(42);
 }




More information about the cfe-commits mailing list