[PATCH] D38046: [AtomicExpandPass][X86] set MaxAtomicSizeInBitsSupported according to the target

Tue Sep 19 11:23:16 PDT 2017

wmi created this revision.
Herald added a subscriber: sanjoy.

Now MaxAtomicSizeInBitsSupported is set to 1024, which leads to __sync_* libcalls  for 128 bits atomic load/store/compxchg (PR31620).  The patch is extracted from https://reviews.llvm.org/D18201 to solve the problem so that https://reviews.llvm.org/rL312830 can be reenabled (reverted because of PR31620).


Repository:
  rL LLVM

https://reviews.llvm.org/D38046

Files:
  lib/Target/X86/X86ISelLowering.cpp
  test/CodeGen/X86/atomic-non-integer.ll
  test/CodeGen/X86/nocx16.ll


Index: test/CodeGen/X86/nocx16.ll
===================================================================

--- test/CodeGen/X86/nocx16.ll
+++ test/CodeGen/X86/nocx16.ll
@@ -1,21 +1,21 @@
 ; RUN: llc < %s -mtriple=x86_64-- -mcpu=corei7 -mattr=-cx16 | FileCheck %s
 define void @test(i128* %a) nounwind {
 entry:
-; CHECK: __sync_val_compare_and_swap_16
+; CHECK: __atomic_compare_exchange_16
   %0 = cmpxchg i128* %a, i128 1, i128 1 seq_cst seq_cst
-; CHECK: __sync_lock_test_and_set_16
+; CHECK: __atomic_exchange_16
   %1 = atomicrmw xchg i128* %a, i128 1 seq_cst
-; CHECK: __sync_fetch_and_add_16
+; CHECK: __atomic_fetch_add_16
   %2 = atomicrmw add i128* %a, i128 1 seq_cst
-; CHECK: __sync_fetch_and_sub_16
+; CHECK: __atomic_fetch_sub_16
   %3 = atomicrmw sub i128* %a, i128 1 seq_cst
-; CHECK: __sync_fetch_and_and_16
+; CHECK: __atomic_fetch_and_16
   %4 = atomicrmw and i128* %a, i128 1 seq_cst
-; CHECK: __sync_fetch_and_nand_16
+; CHECK: __atomic_fetch_nand_16
   %5 = atomicrmw nand i128* %a, i128 1 seq_cst
-; CHECK: __sync_fetch_and_or_16
+; CHECK: __atomic_fetch_or_16
   %6 = atomicrmw or i128* %a, i128 1 seq_cst
-; CHECK: __sync_fetch_and_xor_16
+; CHECK: __atomic_fetch_xor_16
   %7 = atomicrmw xor i128* %a, i128 1 seq_cst
   ret void
 }
Index: test/CodeGen/X86/atomic-non-integer.ll
===================================================================
--- test/CodeGen/X86/atomic-non-integer.ll
+++ test/CodeGen/X86/atomic-non-integer.ll
@@ -34,7 +34,7 @@
 
 define void @store_fp128(fp128* %fptr, fp128 %v) {
 ; CHECK-LABEL: @store_fp128
-; CHECK: callq	__sync_lock_test_and_set_16
+; CHECK: callq	__atomic_store_16
   store atomic fp128 %v, fp128* %fptr unordered, align 16
   ret void
 }
@@ -66,7 +66,7 @@
 
 define fp128 @load_fp128(fp128* %fptr) {
 ; CHECK-LABEL: @load_fp128
-; CHECK: callq	__sync_val_compare_and_swap_16
+; CHECK: callq	__atomic_load_16
   %v = load atomic fp128, fp128* %fptr unordered, align 16
   ret fp128 %v
 }
Index: lib/Target/X86/X86ISelLowering.cpp
===================================================================
--- lib/Target/X86/X86ISelLowering.cpp
+++ lib/Target/X86/X86ISelLowering.cpp
@@ -112,6 +112,17 @@
   // X86-SSE is even stranger. It uses -1 or 0 for vector masks.
   setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
 
+  if (Subtarget.is64Bit()) {
+    if (Subtarget.hasCmpxchg16b())
+      setMaxAtomicSizeInBitsSupported(128);
+    else
+      setMaxAtomicSizeInBitsSupported(64);
+  } else {
+    // FIXME: Check that we actually have cmpxchg (i486 or later)
+    // FIXME: Check that we actually have cmpxchg8b (i586 or later)
+    setMaxAtomicSizeInBitsSupported(64);
+  }
+
   // For 64-bit, since we have so many registers, use the ILP scheduler.
   // For 32-bit, use the register pressure specific scheduling.
   // For Atom, always use ILP scheduling.


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D38046.115869.patch
Type: text/x-patch
Size: 2838 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20170919/de0a2ddd/attachment.bin>