[llvm] [AtomicExpandPass] Add umin, smin, umax, smax to isIdempotentRMW (PR #142277)
via llvm-commits
llvm-commits at lists.llvm.org
Sat May 31 08:36:05 PDT 2025
https://github.com/AZero13 created https://github.com/llvm/llvm-project/pull/142277
None
>From 169de7648c3346b6334e5edbdfdcd525a060b78c Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sat, 31 May 2025 10:55:07 -0400
Subject: [PATCH 1/2] Pre-commit tests (NFC)
---
llvm/test/CodeGen/X86/atomic-idempotent.ll | 186 ++++++++++++++++++
llvm/test/Transforms/InstCombine/atomicrmw.ll | 20 ++
2 files changed, 206 insertions(+)
diff --git a/llvm/test/CodeGen/X86/atomic-idempotent.ll b/llvm/test/CodeGen/X86/atomic-idempotent.ll
index 020f9eb793102..4081f0b6c3937 100644
--- a/llvm/test/CodeGen/X86/atomic-idempotent.ll
+++ b/llvm/test/CodeGen/X86/atomic-idempotent.ll
@@ -622,4 +622,190 @@ define void @or8_nouse_seq_cst(ptr %p) #0 {
ret void
}
+define void @atomic_umin_uint_max(ptr %addr) {
+; CHECK-LABEL: @atomic_umin_uint_max(
+; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 0 monotonic, align 4
+; CHECK-NEXT: ret i32 [[RES]]
+;
+; X64-LABEL: atomic_umin_uint_max:
+; X64: # %bb.0:
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: .p2align 4
+; X64-NEXT: .LBB15_1: # %atomicrmw.start
+; X64-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-NEXT: lock cmpxchgl %eax, (%rdi)
+; X64-NEXT: jne .LBB15_1
+; X64-NEXT: # %bb.2: # %atomicrmw.end
+; X64-NEXT: retq
+;
+; X86-LABEL: atomic_umin_uint_max:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl (%ecx), %eax
+; X86-NEXT: .p2align 4
+; X86-NEXT: .LBB15_1: # %atomicrmw.start
+; X86-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-NEXT: lock cmpxchgl %eax, (%ecx)
+; X86-NEXT: jne .LBB15_1
+; X86-NEXT: # %bb.2: # %atomicrmw.end
+; X86-NEXT: retl
+ atomicrmw umin ptr %addr, i32 -1 seq_cst
+ ret void
+}
+
+define void @atomic_umax_zero(ptr %addr) {
+; CHECK-LABEL: @atomic_umax_zero(
+; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i32 0 monotonic, align 4
+; CHECK-NEXT: ret i32 [[RES]]
+;
+; X64-LABEL: atomic_umax_zero:
+; X64: # %bb.0:
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: .p2align 4
+; X64-NEXT: .LBB16_1: # %atomicrmw.start
+; X64-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-NEXT: lock cmpxchgl %eax, (%rdi)
+; X64-NEXT: jne .LBB16_1
+; X64-NEXT: # %bb.2: # %atomicrmw.end
+; X64-NEXT: retq
+;
+; X86-LABEL: atomic_umax_zero:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl (%ecx), %eax
+; X86-NEXT: .p2align 4
+; X86-NEXT: .LBB16_1: # %atomicrmw.start
+; X86-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-NEXT: lock cmpxchgl %eax, (%ecx)
+; X86-NEXT: jne .LBB16_1
+; X86-NEXT: # %bb.2: # %atomicrmw.end
+; X86-NEXT: retl
+ atomicrmw umax ptr %addr, i32 0 seq_cst
+ ret void
+}
+
+define void @atomic_min_smax_char(ptr %addr) {
+; CHECK-LABEL: @atomic_min_smax_char(
+; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 monotonic, align 1
+; CHECK-NEXT: ret i8 [[RES]]
+;
+; X64-LABEL: atomic_min_smax_char:
+; X64: # %bb.0:
+; X64-NEXT: movzbl (%rdi), %eax
+; X64-NEXT: .p2align 4
+; X64-NEXT: .LBB17_1: # %atomicrmw.start
+; X64-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-NEXT: lock cmpxchgb %al, (%rdi)
+; X64-NEXT: jne .LBB17_1
+; X64-NEXT: # %bb.2: # %atomicrmw.end
+; X64-NEXT: retq
+;
+; X86-LABEL: atomic_min_smax_char:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl (%ecx), %eax
+; X86-NEXT: .p2align 4
+; X86-NEXT: .LBB17_1: # %atomicrmw.start
+; X86-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-NEXT: lock cmpxchgb %al, (%ecx)
+; X86-NEXT: jne .LBB17_1
+; X86-NEXT: # %bb.2: # %atomicrmw.end
+; X86-NEXT: retl
+ atomicrmw min ptr %addr, i8 127 seq_cst
+ ret void
+}
+
+define void @atomic_max_smin_char(ptr %addr) {
+; CHECK-LABEL: @atomic_max_smin_char(
+; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 monotonic, align 1
+; CHECK-NEXT: ret i8 [[RES]]
+;
+; X64-LABEL: atomic_max_smin_char:
+; X64: # %bb.0:
+; X64-NEXT: movzbl (%rdi), %eax
+; X64-NEXT: .p2align 4
+; X64-NEXT: .LBB18_1: # %atomicrmw.start
+; X64-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-NEXT: lock cmpxchgb %al, (%rdi)
+; X64-NEXT: jne .LBB18_1
+; X64-NEXT: # %bb.2: # %atomicrmw.end
+; X64-NEXT: retq
+;
+; X86-LABEL: atomic_max_smin_char:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl (%ecx), %eax
+; X86-NEXT: .p2align 4
+; X86-NEXT: .LBB18_1: # %atomicrmw.start
+; X86-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-NEXT: lock cmpxchgb %al, (%ecx)
+; X86-NEXT: jne .LBB18_1
+; X86-NEXT: # %bb.2: # %atomicrmw.end
+; X86-NEXT: retl
+ atomicrmw max ptr %addr, i8 -128 seq_cst
+ ret void
+}
+
+define void @atomic_min_umax_char(ptr %addr) {
+; CHECK-LABEL: @atomic_min_umax_char(
+; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 monotonic, align 1
+; CHECK-NEXT: ret i8 [[RES]]
+;
+; X64-LABEL: atomic_min_umax_char:
+; X64: # %bb.0:
+; X64-NEXT: movzbl (%rdi), %eax
+; X64-NEXT: .p2align 4
+; X64-NEXT: .LBB19_1: # %atomicrmw.start
+; X64-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-NEXT: lock cmpxchgb %al, (%rdi)
+; X64-NEXT: jne .LBB19_1
+; X64-NEXT: # %bb.2: # %atomicrmw.end
+; X64-NEXT: retq
+;
+; X86-LABEL: atomic_min_umax_char:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl (%ecx), %eax
+; X86-NEXT: .p2align 4
+; X86-NEXT: .LBB19_1: # %atomicrmw.start
+; X86-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-NEXT: lock cmpxchgb %al, (%ecx)
+; X86-NEXT: jne .LBB19_1
+; X86-NEXT: # %bb.2: # %atomicrmw.end
+; X86-NEXT: retl
+ atomicrmw umin ptr %addr, i8 255 seq_cst
+ ret void
+}
+
+define void @atomic_max_umin_char(ptr %addr) {
+; CHECK-LABEL: @atomic_max_umin_char(
+; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 monotonic, align 1
+; CHECK-NEXT: ret i8 [[RES]]
+;
+; X64-LABEL: atomic_max_umin_char:
+; X64: # %bb.0:
+; X64-NEXT: movzbl (%rdi), %eax
+; X64-NEXT: .p2align 4
+; X64-NEXT: .LBB20_1: # %atomicrmw.start
+; X64-NEXT: # =>This Inner Loop Header: Depth=1
+; X64-NEXT: lock cmpxchgb %al, (%rdi)
+; X64-NEXT: jne .LBB20_1
+; X64-NEXT: # %bb.2: # %atomicrmw.end
+; X64-NEXT: retq
+;
+; X86-LABEL: atomic_max_umin_char:
+; X86: # %bb.0:
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movzbl (%ecx), %eax
+; X86-NEXT: .p2align 4
+; X86-NEXT: .LBB20_1: # %atomicrmw.start
+; X86-NEXT: # =>This Inner Loop Header: Depth=1
+; X86-NEXT: lock cmpxchgb %al, (%ecx)
+; X86-NEXT: jne .LBB20_1
+; X86-NEXT: # %bb.2: # %atomicrmw.end
+; X86-NEXT: retl
+ atomicrmw umax ptr %addr, i8 0 seq_cst
+ ret void
+}
+
attributes #0 = { nounwind }
diff --git a/llvm/test/Transforms/InstCombine/atomicrmw.ll b/llvm/test/Transforms/InstCombine/atomicrmw.ll
index ca5ffd110ad61..b6c0e1e810f96 100644
--- a/llvm/test/Transforms/InstCombine/atomicrmw.ll
+++ b/llvm/test/Transforms/InstCombine/atomicrmw.ll
@@ -85,6 +85,26 @@ define i8 @atomic_max_smin_char(ptr %addr) {
ret i8 %res
}
+; Idempotent atomicrmw are still canonicalized.
+define i8 @atomic_min_umax_char(ptr %addr) {
+; CHECK-LABEL: @atomic_min_umax_char(
+; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 monotonic, align 1
+; CHECK-NEXT: ret i8 [[RES]]
+;
+ %res = atomicrmw umin ptr %addr, i8 255 monotonic
+ ret i8 %res
+}
+
+; Idempotent atomicrmw are still canonicalized.
+define i8 @atomic_max_umin_char(ptr %addr) {
+; CHECK-LABEL: @atomic_max_umin_char(
+; CHECK-NEXT: [[RES:%.*]] = atomicrmw or ptr [[ADDR:%.*]], i8 0 monotonic, align 1
+; CHECK-NEXT: ret i8 [[RES]]
+;
+ %res = atomicrmw umax ptr %addr, i8 0 monotonic
+ ret i8 %res
+}
+
; Idempotent atomicrmw are still canonicalized.
define float @atomic_fsub_zero(ptr %addr) {
; CHECK-LABEL: @atomic_fsub_zero(
>From 7d9591267d59c83aefd403f5f4836745d85c93b7 Mon Sep 17 00:00:00 2001
From: Rose <gfunni234 at gmail.com>
Date: Sat, 31 May 2025 10:31:30 -0400
Subject: [PATCH 2/2] Add umin, smin, umax, smax to isIdempotentRMW
---
llvm/lib/CodeGen/AtomicExpandPass.cpp | 8 +-
llvm/test/CodeGen/X86/atomic-idempotent.ll | 240 ++++++++++++---------
2 files changed, 145 insertions(+), 103 deletions(-)
diff --git a/llvm/lib/CodeGen/AtomicExpandPass.cpp b/llvm/lib/CodeGen/AtomicExpandPass.cpp
index c376de877ac7d..2b86bed152d7c 100644
--- a/llvm/lib/CodeGen/AtomicExpandPass.cpp
+++ b/llvm/lib/CodeGen/AtomicExpandPass.cpp
@@ -1580,10 +1580,16 @@ bool AtomicExpandImpl::isIdempotentRMW(AtomicRMWInst *RMWI) {
case AtomicRMWInst::Sub:
case AtomicRMWInst::Or:
case AtomicRMWInst::Xor:
+ case AtomicRMWInst::UMax: // umax(x, 0) == x
return C->isZero();
case AtomicRMWInst::And:
return C->isMinusOne();
- // FIXME: we could also treat Min/Max/UMin/UMax by the INT_MIN/INT_MAX/...
+ case AtomicRMWInst::Max: // max(x, INT_MIN) == x
+ return C->isMinValue(/*isSigned=*/true);
+ case AtomicRMWInst::Min: // min(x, INT_MAX) == x
+ return C->isMaxValue(/*isSigned=*/true);
+ case AtomicRMWInst::UMin: // umin(x, UINT_MAX) == x
+ return C->isMaxValue(/*isSigned=*/false);
default:
return false;
}
diff --git a/llvm/test/CodeGen/X86/atomic-idempotent.ll b/llvm/test/CodeGen/X86/atomic-idempotent.ll
index 4081f0b6c3937..db3dedb6f7ac0 100644
--- a/llvm/test/CodeGen/X86/atomic-idempotent.ll
+++ b/llvm/test/CodeGen/X86/atomic-idempotent.ll
@@ -629,26 +629,32 @@ define void @atomic_umin_uint_max(ptr %addr) {
;
; X64-LABEL: atomic_umin_uint_max:
; X64: # %bb.0:
+; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4
-; X64-NEXT: .LBB15_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: lock cmpxchgl %eax, (%rdi)
-; X64-NEXT: jne .LBB15_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: retq
;
-; X86-LABEL: atomic_umin_uint_max:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %eax
-; X86-NEXT: .p2align 4
-; X86-NEXT: .LBB15_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: lock cmpxchgl %eax, (%ecx)
-; X86-NEXT: jne .LBB15_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: retl
+; X86-SSE2-LABEL: atomic_umin_uint_max:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: mfence
+; X86-SSE2-NEXT: movl (%eax), %eax
+; X86-SSE2-NEXT: retl
+;
+; X86-SLM-LABEL: atomic_umin_uint_max:
+; X86-SLM: # %bb.0:
+; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLM-NEXT: lock orl $0, (%esp)
+; X86-SLM-NEXT: movl (%eax), %eax
+; X86-SLM-NEXT: retl
+;
+; X86-ATOM-LABEL: atomic_umin_uint_max:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT: lock orl $0, (%esp)
+; X86-ATOM-NEXT: movl (%eax), %eax
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: retl
atomicrmw umin ptr %addr, i32 -1 seq_cst
ret void
}
@@ -660,26 +666,32 @@ define void @atomic_umax_zero(ptr %addr) {
;
; X64-LABEL: atomic_umax_zero:
; X64: # %bb.0:
+; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT: movl (%rdi), %eax
-; X64-NEXT: .p2align 4
-; X64-NEXT: .LBB16_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: lock cmpxchgl %eax, (%rdi)
-; X64-NEXT: jne .LBB16_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: retq
;
-; X86-LABEL: atomic_umax_zero:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl (%ecx), %eax
-; X86-NEXT: .p2align 4
-; X86-NEXT: .LBB16_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: lock cmpxchgl %eax, (%ecx)
-; X86-NEXT: jne .LBB16_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: retl
+; X86-SSE2-LABEL: atomic_umax_zero:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: mfence
+; X86-SSE2-NEXT: movl (%eax), %eax
+; X86-SSE2-NEXT: retl
+;
+; X86-SLM-LABEL: atomic_umax_zero:
+; X86-SLM: # %bb.0:
+; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLM-NEXT: lock orl $0, (%esp)
+; X86-SLM-NEXT: movl (%eax), %eax
+; X86-SLM-NEXT: retl
+;
+; X86-ATOM-LABEL: atomic_umax_zero:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT: lock orl $0, (%esp)
+; X86-ATOM-NEXT: movl (%eax), %eax
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: retl
atomicrmw umax ptr %addr, i32 0 seq_cst
ret void
}
@@ -691,26 +703,32 @@ define void @atomic_min_smax_char(ptr %addr) {
;
; X64-LABEL: atomic_min_smax_char:
; X64: # %bb.0:
+; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl (%rdi), %eax
-; X64-NEXT: .p2align 4
-; X64-NEXT: .LBB17_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: lock cmpxchgb %al, (%rdi)
-; X64-NEXT: jne .LBB17_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: retq
;
-; X86-LABEL: atomic_min_smax_char:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzbl (%ecx), %eax
-; X86-NEXT: .p2align 4
-; X86-NEXT: .LBB17_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: lock cmpxchgb %al, (%ecx)
-; X86-NEXT: jne .LBB17_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: retl
+; X86-SSE2-LABEL: atomic_min_smax_char:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: mfence
+; X86-SSE2-NEXT: movzbl (%eax), %eax
+; X86-SSE2-NEXT: retl
+;
+; X86-SLM-LABEL: atomic_min_smax_char:
+; X86-SLM: # %bb.0:
+; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLM-NEXT: lock orl $0, (%esp)
+; X86-SLM-NEXT: movzbl (%eax), %eax
+; X86-SLM-NEXT: retl
+;
+; X86-ATOM-LABEL: atomic_min_smax_char:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT: lock orl $0, (%esp)
+; X86-ATOM-NEXT: movzbl (%eax), %eax
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: retl
atomicrmw min ptr %addr, i8 127 seq_cst
ret void
}
@@ -722,26 +740,32 @@ define void @atomic_max_smin_char(ptr %addr) {
;
; X64-LABEL: atomic_max_smin_char:
; X64: # %bb.0:
+; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl (%rdi), %eax
-; X64-NEXT: .p2align 4
-; X64-NEXT: .LBB18_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: lock cmpxchgb %al, (%rdi)
-; X64-NEXT: jne .LBB18_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: retq
;
-; X86-LABEL: atomic_max_smin_char:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzbl (%ecx), %eax
-; X86-NEXT: .p2align 4
-; X86-NEXT: .LBB18_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: lock cmpxchgb %al, (%ecx)
-; X86-NEXT: jne .LBB18_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: retl
+; X86-SSE2-LABEL: atomic_max_smin_char:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: mfence
+; X86-SSE2-NEXT: movzbl (%eax), %eax
+; X86-SSE2-NEXT: retl
+;
+; X86-SLM-LABEL: atomic_max_smin_char:
+; X86-SLM: # %bb.0:
+; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLM-NEXT: lock orl $0, (%esp)
+; X86-SLM-NEXT: movzbl (%eax), %eax
+; X86-SLM-NEXT: retl
+;
+; X86-ATOM-LABEL: atomic_max_smin_char:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT: lock orl $0, (%esp)
+; X86-ATOM-NEXT: movzbl (%eax), %eax
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: retl
atomicrmw max ptr %addr, i8 -128 seq_cst
ret void
}
@@ -753,26 +777,32 @@ define void @atomic_min_umax_char(ptr %addr) {
;
; X64-LABEL: atomic_min_umax_char:
; X64: # %bb.0:
+; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl (%rdi), %eax
-; X64-NEXT: .p2align 4
-; X64-NEXT: .LBB19_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: lock cmpxchgb %al, (%rdi)
-; X64-NEXT: jne .LBB19_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: retq
;
-; X86-LABEL: atomic_min_umax_char:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzbl (%ecx), %eax
-; X86-NEXT: .p2align 4
-; X86-NEXT: .LBB19_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: lock cmpxchgb %al, (%ecx)
-; X86-NEXT: jne .LBB19_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: retl
+; X86-SSE2-LABEL: atomic_min_umax_char:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: mfence
+; X86-SSE2-NEXT: movzbl (%eax), %eax
+; X86-SSE2-NEXT: retl
+;
+; X86-SLM-LABEL: atomic_min_umax_char:
+; X86-SLM: # %bb.0:
+; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLM-NEXT: lock orl $0, (%esp)
+; X86-SLM-NEXT: movzbl (%eax), %eax
+; X86-SLM-NEXT: retl
+;
+; X86-ATOM-LABEL: atomic_min_umax_char:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT: lock orl $0, (%esp)
+; X86-ATOM-NEXT: movzbl (%eax), %eax
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: retl
atomicrmw umin ptr %addr, i8 255 seq_cst
ret void
}
@@ -784,26 +814,32 @@ define void @atomic_max_umin_char(ptr %addr) {
;
; X64-LABEL: atomic_max_umin_char:
; X64: # %bb.0:
+; X64-NEXT: lock orl $0, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl (%rdi), %eax
-; X64-NEXT: .p2align 4
-; X64-NEXT: .LBB20_1: # %atomicrmw.start
-; X64-NEXT: # =>This Inner Loop Header: Depth=1
-; X64-NEXT: lock cmpxchgb %al, (%rdi)
-; X64-NEXT: jne .LBB20_1
-; X64-NEXT: # %bb.2: # %atomicrmw.end
; X64-NEXT: retq
;
-; X86-LABEL: atomic_max_umin_char:
-; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzbl (%ecx), %eax
-; X86-NEXT: .p2align 4
-; X86-NEXT: .LBB20_1: # %atomicrmw.start
-; X86-NEXT: # =>This Inner Loop Header: Depth=1
-; X86-NEXT: lock cmpxchgb %al, (%ecx)
-; X86-NEXT: jne .LBB20_1
-; X86-NEXT: # %bb.2: # %atomicrmw.end
-; X86-NEXT: retl
+; X86-SSE2-LABEL: atomic_max_umin_char:
+; X86-SSE2: # %bb.0:
+; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SSE2-NEXT: mfence
+; X86-SSE2-NEXT: movzbl (%eax), %eax
+; X86-SSE2-NEXT: retl
+;
+; X86-SLM-LABEL: atomic_max_umin_char:
+; X86-SLM: # %bb.0:
+; X86-SLM-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-SLM-NEXT: lock orl $0, (%esp)
+; X86-SLM-NEXT: movzbl (%eax), %eax
+; X86-SLM-NEXT: retl
+;
+; X86-ATOM-LABEL: atomic_max_umin_char:
+; X86-ATOM: # %bb.0:
+; X86-ATOM-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-ATOM-NEXT: lock orl $0, (%esp)
+; X86-ATOM-NEXT: movzbl (%eax), %eax
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: nop
+; X86-ATOM-NEXT: retl
atomicrmw umax ptr %addr, i8 0 seq_cst
ret void
}
More information about the llvm-commits
mailing list