[llvm] lock opt ptr const inconsistencies x86 only (PR #185195)
Takashi Idobe via llvm-commits
llvm-commits at lists.llvm.org
Sat Mar 7 07:13:15 PST 2026
https://github.com/Takashiidobe created https://github.com/llvm/llvm-project/pull/185195
Resolves: https://github.com/llvm/llvm-project/issues/147280
>From 2622138c19a92c478a5a157d405cc7e911a92604 Mon Sep 17 00:00:00 2001
From: Takashiidobe <idobetakashi at gmail.com>
Date: Fri, 6 Mar 2026 09:37:22 -0500
Subject: [PATCH 1/3] refactor x86 emitCmpArithAtomicRMWIntrinsic and handle
cases where instcombine turned binop + icmp into just icmp
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 154 +++++++++++++++++-------
1 file changed, 113 insertions(+), 41 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6bb558f4ef6da..a1b9db55c3132 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32473,68 +32473,155 @@ void X86TargetLowering::emitBitTestAtomicRMWIntrinsic(AtomicRMWInst *AI) const {
AI->eraseFromParent();
}
-static bool shouldExpandCmpArithRMWInIR(const AtomicRMWInst *AI) {
+/// Return the X86 condition code for a CmpArith atomic RMW pattern that can
+/// be lowered to a lock instruction + setcc, or COND_INVALID if the pattern
+/// is not recognised.
+///
+/// Handles both the "natural" form where the intermediate arithmetic is still
+/// present as a separate instruction, and the InstCombine-folded form where
+/// the intermediate was simplified away and the icmp compares the old value
+/// directly.
+static X86::CondCode getCmpArithCC(const AtomicRMWInst *AI) {
using namespace llvm::PatternMatch;
if (!AI->hasOneUse())
- return false;
+ return X86::COND_INVALID;
Value *Op = AI->getOperand(1);
CmpPredicate Pred;
const Instruction *I = AI->user_back();
AtomicRMWInst::BinOp Opc = AI->getOperation();
+
if (Opc == AtomicRMWInst::Add) {
- if (match(I, m_c_ICmp(Pred, m_Sub(m_ZeroInt(), m_Specific(Op)), m_Value())))
- return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE;
+ // InstCombine-folded form: icmp eq (old + Op), 0 -> icmp eq old, (0 - Op)
+ // lock add sets ZF on the new value; old + Op == 0 <=> old == -Op [ZF]
+ if (match(I,
+ m_c_ICmp(Pred, m_Sub(m_ZeroInt(), m_Specific(Op)), m_Value()))) {
+ if (Pred == CmpInst::ICMP_EQ)
+ return X86::COND_E;
+ if (Pred == CmpInst::ICMP_NE)
+ return X86::COND_NE;
+ }
+ // Non-folded form: %new = add %old, Op; icmp slt/sgt %new, 0/-1
+ // lock add sets SF on the new value directly.
if (match(I, m_OneUse(m_c_Add(m_Specific(Op), m_Value())))) {
if (match(I->user_back(),
m_SpecificICmp(CmpInst::ICMP_SLT, m_Value(), m_ZeroInt())))
- return true;
+ return X86::COND_S;
if (match(I->user_back(),
m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes())))
- return true;
+ return X86::COND_NS;
}
- return false;
+ return X86::COND_INVALID;
}
if (Opc == AtomicRMWInst::Sub) {
- if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value())))
- return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE;
+ // InstCombine-folded form: icmp eq (old - Op), 0 -> icmp eq old, Op
+ // lock sub sets ZF on the new value; old - Op == 0 <=> old == Op [ZF]
+ if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value()))) {
+ if (Pred == CmpInst::ICMP_EQ)
+ return X86::COND_E;
+ if (Pred == CmpInst::ICMP_NE)
+ return X86::COND_NE;
+ }
+ // Non-folded form: %new = sub %old, Op; icmp slt/sgt %new, 0/-1
+ // lock sub sets SF on the new value directly.
if (match(I, m_OneUse(m_Sub(m_Value(), m_Specific(Op))))) {
if (match(I->user_back(),
m_SpecificICmp(CmpInst::ICMP_SLT, m_Value(), m_ZeroInt())))
- return true;
+ return X86::COND_S;
if (match(I->user_back(),
m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes())))
- return true;
+ return X86::COND_NS;
}
- return false;
+ return X86::COND_INVALID;
}
+ // Non-folded form for Or/And: %new = or/and %old, Op; icmp P %new, 0/-1
+ // lock or/and sets ZF/SF on the new value directly.
if ((Opc == AtomicRMWInst::Or &&
match(I, m_OneUse(m_c_Or(m_Specific(Op), m_Value())))) ||
(Opc == AtomicRMWInst::And &&
match(I, m_OneUse(m_c_And(m_Specific(Op), m_Value()))))) {
- if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt())))
- return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE ||
- Pred == CmpInst::ICMP_SLT;
+ if (match(I->user_back(), m_ICmp(Pred, m_Value(), m_ZeroInt()))) {
+ if (Pred == CmpInst::ICMP_EQ)
+ return X86::COND_E;
+ if (Pred == CmpInst::ICMP_NE)
+ return X86::COND_NE;
+ if (Pred == CmpInst::ICMP_SLT)
+ return X86::COND_S;
+ }
if (match(I->user_back(),
m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes())))
- return true;
- return false;
+ return X86::COND_NS;
+ return X86::COND_INVALID;
+ }
+ if (Opc == AtomicRMWInst::And) {
+ // InstCombine-folded form: the intermediate 'and' was simplified away,
+ // leaving an icmp directly on the old value. The lock and instruction
+ // still sets flags on (old & C), so we recover the right CC algebraically:
+ // icmp ult old, -C <=> (old & C) == 0 [ZF=1]
+ // icmp ugt old, ~C <=> (old & C) != 0 [ZF=0]
+ // When C has the sign bit set, bit-63 is preserved by the AND, so:
+ // icmp slt old, 0 <=> (old & C) < 0 [SF=1]
+ // icmp sgt old, -1 <=> (old & C) >= 0 [SF=0]
+ auto *CI = dyn_cast<ConstantInt>(Op);
+ if (!CI)
+ return X86::COND_INVALID;
+ const APInt &C = CI->getValue();
+ const APInt *K;
+ if (match(I, m_c_ICmp(Pred, m_Specific(AI), m_APInt(K)))) {
+ if (Pred == ICmpInst::ICMP_ULT && *K == -C)
+ return X86::COND_E;
+ if (Pred == ICmpInst::ICMP_UGT && *K == ~C)
+ return X86::COND_NE;
+ if (C.isNegative()) {
+ if (Pred == ICmpInst::ICMP_SLT && K->isZero())
+ return X86::COND_S;
+ if (Pred == ICmpInst::ICMP_SGT && K->isAllOnes())
+ return X86::COND_NS;
+ }
+ }
+ return X86::COND_INVALID;
}
if (Opc == AtomicRMWInst::Xor) {
- if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value())))
- return Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE;
+ // InstCombine-folded form: icmp eq (old ^ Op), 0 -> icmp eq old, Op
+ // x ^ c == 0 <=> x == c, so lock xor ZF=1 <=> old == Op [ZF]
+ if (match(I, m_c_ICmp(Pred, m_Specific(Op), m_Value()))) {
+ if (Pred == CmpInst::ICMP_EQ)
+ return X86::COND_E;
+ if (Pred == CmpInst::ICMP_NE)
+ return X86::COND_NE;
+ }
+ // Non-folded form: %new = xor %old, Op; icmp slt/sgt %new, 0/-1
+ // lock xor sets SF on the new value directly.
if (match(I, m_OneUse(m_c_Xor(m_Specific(Op), m_Value())))) {
if (match(I->user_back(),
m_SpecificICmp(CmpInst::ICMP_SLT, m_Value(), m_ZeroInt())))
- return true;
+ return X86::COND_S;
if (match(I->user_back(),
m_SpecificICmp(CmpInst::ICMP_SGT, m_Value(), m_AllOnes())))
- return true;
+ return X86::COND_NS;
}
- return false;
+ // InstCombine-folded form: when C has the sign bit set, XOR flips bit-63
+ // so SF(old ^ C) = !SF(old). The folded predicates are therefore inverted
+ // relative to the non-folded ones above:
+ // icmp sgt old, -1 <=> (old ^ C) < 0 [SF=1]
+ // icmp slt old, 0 <=> (old ^ C) >= 0 [SF=0]
+ if (auto *CI = dyn_cast<ConstantInt>(Op);
+ CI && CI->getValue().isNegative()) {
+ if (match(I, m_c_ICmp(Pred, m_Specific(AI), m_AllOnes())) &&
+ Pred == ICmpInst::ICMP_SGT)
+ return X86::COND_S;
+ if (match(I, m_c_ICmp(Pred, m_Specific(AI), m_ZeroInt())) &&
+ Pred == ICmpInst::ICMP_SLT)
+ return X86::COND_NS;
+ }
+ return X86::COND_INVALID;
}
- return false;
+ return X86::COND_INVALID;
+}
+
+static bool shouldExpandCmpArithRMWInIR(const AtomicRMWInst *AI) {
+ return getCmpArithCC(AI) != X86::COND_INVALID;
}
void X86TargetLowering::emitCmpArithAtomicRMWIntrinsic(
@@ -32549,24 +32636,9 @@ void X86TargetLowering::emitCmpArithAtomicRMWIntrinsic(
assert(TempI->hasOneUse() && "Must have one use");
ICI = cast<ICmpInst>(TempI->user_back());
}
- X86::CondCode CC = X86::COND_INVALID;
- ICmpInst::Predicate Pred = ICI->getPredicate();
- switch (Pred) {
- default:
- llvm_unreachable("Not supported Pred");
- case CmpInst::ICMP_EQ:
- CC = X86::COND_E;
- break;
- case CmpInst::ICMP_NE:
- CC = X86::COND_NE;
- break;
- case CmpInst::ICMP_SLT:
- CC = X86::COND_S;
- break;
- case CmpInst::ICMP_SGT:
- CC = X86::COND_NS;
- break;
- }
+ X86::CondCode CC = getCmpArithCC(AI);
+ assert(CC != X86::COND_INVALID && "emitCmpArithAtomicRMWIntrinsic called "
+ "without a recognised pattern");
Intrinsic::ID IID = Intrinsic::not_intrinsic;
switch (AI->getOperation()) {
default:
>From 95fbffc36334d34bcab53d553ba7907cd61147f8 Mon Sep 17 00:00:00 2001
From: Takashiidobe <idobetakashi at gmail.com>
Date: Fri, 6 Mar 2026 09:59:47 -0500
Subject: [PATCH 2/3] add test cases
---
.../X86/atomic-lock-setcc-instcombine.ll | 79 +++++++++++++++++++
1 file changed, 79 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/atomic-lock-setcc-instcombine.ll
diff --git a/llvm/test/CodeGen/X86/atomic-lock-setcc-instcombine.ll b/llvm/test/CodeGen/X86/atomic-lock-setcc-instcombine.ll
new file mode 100644
index 0000000000000..968d787999cbe
--- /dev/null
+++ b/llvm/test/CodeGen/X86/atomic-lock-setcc-instcombine.ll
@@ -0,0 +1,79 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s
+
+; InstCombine folds icmp(binop(atomicrmw, C), 0) into a direct icmp on the
+; old value, removing the intermediate arithmetic instruction. The X86
+; backend must recognise these folded forms in shouldExpandAtomicRMWInIR and
+; emit a lock instruction + setcc rather than a cmpxchg loop.
+
+; icmp eq (old & -2), 0 -> icmp ult old, 2
+define i1 @lock_and_sete_folded(ptr %0) nounwind {
+; CHECK-LABEL: lock_and_sete_folded:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lock andq $-2, (%rdi)
+; CHECK-NEXT: sete %al
+; CHECK-NEXT: retq
+ %2 = atomicrmw and ptr %0, i64 -2 seq_cst, align 8
+ %3 = icmp ult i64 %2, 2
+ ret i1 %3
+}
+
+; icmp ne (old & -2), 0 -> icmp ugt old, 1
+define i1 @lock_and_setne_folded(ptr %0) nounwind {
+; CHECK-LABEL: lock_and_setne_folded:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lock andq $-2, (%rdi)
+; CHECK-NEXT: setne %al
+; CHECK-NEXT: retq
+ %2 = atomicrmw and ptr %0, i64 -2 seq_cst, align 8
+ %3 = icmp ugt i64 %2, 1
+ ret i1 %3
+}
+
+; icmp slt (old & -2), 0 -> icmp slt old, 0 (C has sign bit; AND preserves it)
+define i1 @lock_and_sets_folded(ptr %0) nounwind {
+; CHECK-LABEL: lock_and_sets_folded:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lock andq $-2, (%rdi)
+; CHECK-NEXT: sets %al
+; CHECK-NEXT: retq
+ %2 = atomicrmw and ptr %0, i64 -2 seq_cst, align 8
+ %3 = icmp slt i64 %2, 0
+ ret i1 %3
+}
+
+; icmp sgt (old & -2), -1 -> icmp sgt old, -1 (C has sign bit; AND preserves it)
+define i1 @lock_and_setns_folded(ptr %0) nounwind {
+; CHECK-LABEL: lock_and_setns_folded:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lock andq $-2, (%rdi)
+; CHECK-NEXT: setns %al
+; CHECK-NEXT: retq
+ %2 = atomicrmw and ptr %0, i64 -2 seq_cst, align 8
+ %3 = icmp sgt i64 %2, -1
+ ret i1 %3
+}
+
+; icmp slt (old ^ -2), 0 -> icmp sgt old, -1 (XOR with -2 flips sign bit)
+define i1 @lock_xor_sets_folded(ptr %0) nounwind {
+; CHECK-LABEL: lock_xor_sets_folded:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lock xorq $-2, (%rdi)
+; CHECK-NEXT: sets %al
+; CHECK-NEXT: retq
+ %2 = atomicrmw xor ptr %0, i64 -2 seq_cst, align 8
+ %3 = icmp sgt i64 %2, -1
+ ret i1 %3
+}
+
+; icmp sgt (old ^ -2), -1 -> icmp slt old, 0 (XOR with -2 flips sign bit)
+define i1 @lock_xor_setns_folded(ptr %0) nounwind {
+; CHECK-LABEL: lock_xor_setns_folded:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lock xorq $-2, (%rdi)
+; CHECK-NEXT: setns %al
+; CHECK-NEXT: retq
+ %2 = atomicrmw xor ptr %0, i64 -2 seq_cst, align 8
+ %3 = icmp slt i64 %2, 0
+ ret i1 %3
+}
>From c6b0a07caa17de78739286befc4f0180ad8d6712 Mon Sep 17 00:00:00 2001
From: Takashiidobe <idobetakashi at gmail.com>
Date: Fri, 6 Mar 2026 10:36:13 -0500
Subject: [PATCH 3/3] add guard to make sure C is a power of 2 for the and
optimization
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++--
.../X86/atomic-lock-setcc-instcombine.ll | 23 +++++++++++++++++++
2 files changed, 25 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a1b9db55c3132..6a94fe1feaa32 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -32568,9 +32568,9 @@ static X86::CondCode getCmpArithCC(const AtomicRMWInst *AI) {
const APInt &C = CI->getValue();
const APInt *K;
if (match(I, m_c_ICmp(Pred, m_Specific(AI), m_APInt(K)))) {
- if (Pred == ICmpInst::ICMP_ULT && *K == -C)
+ if (Pred == ICmpInst::ICMP_ULT && *K == -C && (-C).isPowerOf2())
return X86::COND_E;
- if (Pred == ICmpInst::ICMP_UGT && *K == ~C)
+ if (Pred == ICmpInst::ICMP_UGT && *K == ~C && (-C).isPowerOf2())
return X86::COND_NE;
if (C.isNegative()) {
if (Pred == ICmpInst::ICMP_SLT && K->isZero())
diff --git a/llvm/test/CodeGen/X86/atomic-lock-setcc-instcombine.ll b/llvm/test/CodeGen/X86/atomic-lock-setcc-instcombine.ll
index 968d787999cbe..3efd64fd6de4d 100644
--- a/llvm/test/CodeGen/X86/atomic-lock-setcc-instcombine.ll
+++ b/llvm/test/CodeGen/X86/atomic-lock-setcc-instcombine.ll
@@ -77,3 +77,26 @@ define i1 @lock_xor_setns_folded(ptr %0) nounwind {
%3 = icmp slt i64 %2, 0
ret i1 %3
}
+
+; Negative test: C = -3, so -C = 3 which is not a power of two. The folded
+; icmp ult old, 3 does not satisfy the power-of-two guard and must not be
+; matched; a cmpxchg loop is emitted instead.
+define i1 @lock_and_not_pow2(ptr %0) nounwind {
+; CHECK-LABEL: lock_and_not_pow2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq (%rdi), %rax
+; CHECK-NEXT: .p2align 4
+; CHECK-NEXT: .LBB6_1: # %atomicrmw.start
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: movq %rax, %rcx
+; CHECK-NEXT: andq $-3, %rcx
+; CHECK-NEXT: lock cmpxchgq %rcx, (%rdi)
+; CHECK-NEXT: jne .LBB6_1
+; CHECK-NEXT: # %bb.2: # %atomicrmw.end
+; CHECK-NEXT: cmpq $3, %rax
+; CHECK-NEXT: setb %al
+; CHECK-NEXT: retq
+ %2 = atomicrmw and ptr %0, i64 -3 seq_cst, align 8
+ %3 = icmp ult i64 %2, 3
+ ret i1 %3
+}
More information about the llvm-commits
mailing list