[llvm] [X86] Use fence(seq_cst) in IdempotentRMWIntoFencedLoad (PR #126521)
Valentin Churavy via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 6 07:28:15 PST 2025
================
@@ -0,0 +1,656 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefix=X64
+; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mattr=+sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SSE2
+; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=slm -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM
+; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=goldmont -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM
+; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=knl -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-GENERIC,X86-SLM
+; RUN: llc < %s -mtriple=i686-- -verify-machineinstrs -mcpu=atom -mattr=-sse2 | FileCheck %s --check-prefixes=X86,X86-ATOM
+
+; On x86, an atomic rmw operation that does not modify the value in memory
+; (such as atomic add 0) can be replaced by an mfence followed by a mov.
+; This is explained (with the motivation for such an optimization) in
+; http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
+
+define i8 @add8(ptr %p) {
+; X64-LABEL: add8:
+; X64: # %bb.0:
+; X64-NEXT: #MEMBARRIER
+; X64-NEXT: movzbl (%rdi), %eax
+; X64-NEXT: retq
+;
+; X86-GENERIC-LABEL: add8:
+; X86-GENERIC: # %bb.0:
+; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GENERIC-NEXT: #MEMBARRIER
+; X86-GENERIC-NEXT: movzbl (%eax), %eax
+; X86-GENERIC-NEXT: retl
+;
+; X86-ATOM-LABEL: add8:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT: #MEMBARRIER
+; X86-ATOM-NEXT: movzbl (%eax), %eax
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: retl
+ %1 = atomicrmw add ptr %p, i8 0 syncscope("singlethread") monotonic
+ ret i8 %1
+}
+
+define i16 @or16(ptr %p) {
+; X64-LABEL: or16:
+; X64: # %bb.0:
+; X64-NEXT: #MEMBARRIER
+; X64-NEXT: movzwl (%rdi), %eax
+; X64-NEXT: retq
+;
+; X86-GENERIC-LABEL: or16:
+; X86-GENERIC: # %bb.0:
+; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GENERIC-NEXT: #MEMBARRIER
+; X86-GENERIC-NEXT: movzwl (%eax), %eax
+; X86-GENERIC-NEXT: retl
+;
+; X86-ATOM-LABEL: or16:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT: #MEMBARRIER
+; X86-ATOM-NEXT: movzwl (%eax), %eax
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: retl
+ %1 = atomicrmw or ptr %p, i16 0 syncscope("singlethread") acquire
+ ret i16 %1
+}
+
+define i32 @xor32(ptr %p) {
+; X64-LABEL: xor32:
+; X64: # %bb.0:
+; X64-NEXT: #MEMBARRIER
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: retq
+;
+; X86-GENERIC-LABEL: xor32:
+; X86-GENERIC: # %bb.0:
+; X86-GENERIC-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-GENERIC-NEXT: #MEMBARRIER
+; X86-GENERIC-NEXT: movl (%eax), %eax
+; X86-GENERIC-NEXT: retl
+;
+; X86-ATOM-LABEL: xor32:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT: #MEMBARRIER
+; X86-ATOM-NEXT: movl (%eax), %eax
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: retl
+ %1 = atomicrmw xor ptr %p, i32 0 syncscope("singlethread") release
+ ret i32 %1
+}
+
+define i64 @sub64(ptr %p) {
----------------
vchuravy wrote:
Done!
https://github.com/llvm/llvm-project/pull/126521
More information about the llvm-commits
mailing list