[llvm] [Instcombine] Write Instcombine pass to strength reduce lock xadd to lock sub (PR #184715)

Takashi Idobe via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 4 16:48:57 PST 2026


https://github.com/Takashiidobe created https://github.com/llvm/llvm-project/pull/184715

Resolves: https://github.com/llvm/llvm-project/issues/174933

The issue goes into a case where fetch_sub(n) is properly optimized but fetch_add(neg(n)) is not optimized to the same code.

Although the issue is tagged for x86 I assumed this would be useful for any backend so I put it in InstCombine. 

We'll start out with the example from the issue + one more test case to handle the signed case (extern c so there's no name mangling)

```cpp
#include <atomic>

extern "C" {
// Regression test since this is already optimized
bool fn_subu(std::atomic_uint64_t &a, std::uint64_t n) noexcept {
  return a.fetch_sub(n, std::memory_order_relaxed) == n;
}
bool fn_subs(std::atomic_int64_t &a, std::int64_t n) noexcept {
  return a.fetch_sub(n, std::memory_order_relaxed) == n;
}

// Currently unoptimized path
bool fn_addu_neg(std::atomic_uint64_t &a, std::uint64_t n) noexcept {
  return a.fetch_add(-n, std::memory_order_relaxed) == n;
}
bool fn_adds_neg(std::atomic_int64_t &a, std::int64_t n) noexcept {
  return a.fetch_add(-n, std::memory_order_relaxed) == n;
}
}
```

After compiling clang on main it emits this on -O2 confirming what the issue says, fetch_add(neg(n)) generates worse code even though it should be strength reduced like sub. 

```asm
fn_subu:
	lock	subq	%rsi, (%rdi)
	sete	%al
	retq
fn_subs: 
	lock	subq	%rsi, (%rdi)
	sete	%al
	retq
fn_addu_neg:
	movq	%rsi, %rax
	negq	%rax
	lock	xaddq	%rax, (%rdi)
	cmpq	%rsi, %rax
	sete	%al
	retq
fn_addu_neg:
	movq	%rsi, %rax
	negq	%rax
	lock	xaddq	%rax, (%rdi)
	cmpq	%rsi, %rax
	sete	%al
	retq
```

So to fix this up we turn any pointer addition of add(neg(X)) into sub(X). I 

After compiling clang on this branch, I get this so all the cases, signed or not are treated equally.

```asm
fn_subu:
	lock	subq	%rsi, (%rdi)
	sete	%al
	retq
fn_subs:
	lock	subq	%rsi, (%rdi)
	sete	%al
	retq
fn_addu_neg:
	lock	subq	%rsi, (%rdi)
	sete	%al
	retq
fn_adds_neg
	lock	subq	%rsi, (%rdi)
	sete	%al
	retq
```

I don't see any change for aarch64 however, so it might've only been useful for x86?

>From 1b6356b8b354e02d6561a5f8e58ba09b6cb0cffa Mon Sep 17 00:00:00 2001
From: Takashiidobe <idobetakashi at gmail.com>
Date: Tue, 3 Mar 2026 22:34:19 -0500
Subject: [PATCH] write Instcombine to strength reduce lock xadd to lock add

---
 .../InstCombine/InstCombineAtomicRMW.cpp      | 12 +++++
 .../InstCombine/atomicrmw-add-neg.ll          | 49 +++++++++++++++++++
 2 files changed, 61 insertions(+)
 create mode 100644 llvm/test/Transforms/InstCombine/atomicrmw-add-neg.ll

diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
index a2e8c695331a6..a3ba3312138bd 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
@@ -14,6 +14,7 @@
 #include "llvm/IR/Instructions.h"
 
 using namespace llvm;
+using namespace llvm::PatternMatch;
 
 /// Return true if and only if the given instruction does not modify the memory
 /// location referenced.  Note that an idemptent atomicrmw may still have
@@ -118,6 +119,17 @@ Instruction *InstCombinerImpl::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
          RMWI.getOrdering() != AtomicOrdering::Unordered &&
          "AtomicRMWs don't make sense with Unordered or NotAtomic");
 
+  // Canonicalize atomicrmw add(ptr, neg(X)) -> atomicrmw sub(ptr, X).
+  // old + (-X) == old - X; the returned old value is identical.
+  // This allows strength reduction on targets where atomic sub is cheaper,
+  // e.g. lock sub instead of lock xadd on x86.
+  Value *X;
+  if (RMWI.getOperation() == AtomicRMWInst::Add &&
+      match(RMWI.getValOperand(), m_Neg(m_Value(X)))) {
+    RMWI.setOperation(AtomicRMWInst::Sub);
+    return replaceOperand(RMWI, 1, X);
+  }
+
   if (!isIdempotentRMW(RMWI))
     return nullptr;
 
diff --git a/llvm/test/Transforms/InstCombine/atomicrmw-add-neg.ll b/llvm/test/Transforms/InstCombine/atomicrmw-add-neg.ll
new file mode 100644
index 0000000000000..4c1bb20b6a9d4
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/atomicrmw-add-neg.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=instcombine -S %s | FileCheck %s
+
+; Regression case: atomicrmw sub is unchanged by this transform. 
+define i1 @fn_sub(ptr noundef nonnull align 8 captures(none) dereferenceable(8) %a, i64 noundef %n) {
+; CHECK-LABEL: define i1 @fn_sub(
+; CHECK-SAME: ptr noundef nonnull align 8 captures(none) dereferenceable(8) [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw sub ptr [[A]], i64 [[N]] monotonic, align 8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[TMP0]], [[N]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %0 = atomicrmw sub ptr %a, i64 %n monotonic, align 8
+  %cmp = icmp eq i64 %0, %n
+  ret i1 %cmp
+}
+
+; Canonicalize atomicrmw add(ptr, neg(n)) -> atomicrmw sub(ptr, n)
+define i1 @fn_add(ptr noundef nonnull align 8 captures(none) dereferenceable(8) %a, i64 noundef %n) {
+; CHECK-LABEL: define i1 @fn_add(
+; CHECK-SAME: ptr noundef nonnull align 8 captures(none) dereferenceable(8) [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[TMP0:%.*]] = atomicrmw sub ptr [[A]], i64 [[N]] monotonic, align 8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[TMP0]], [[N]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %sub = sub i64 0, %n
+  %0 = atomicrmw add ptr %a, i64 %sub monotonic, align 8
+  %cmp = icmp eq i64 %0, %n
+  ret i1 %cmp
+}
+
+; nsw neg (signed -n) is also handled; the transform replaces the poison case on neg(INT_MIN) with wrapping sub
+define i1 @fn_adds(ptr noundef nonnull align 8 captures(none) dereferenceable(8) %a, i64 noundef %n) {
+; CHECK-LABEL: define i1 @fn_adds(
+; CHECK-SAME: ptr noundef nonnull align 8 captures(none) dereferenceable(8) [[A:%.*]], i64 noundef [[N:%.*]]) {
+; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-NEXT:    [[OLD:%.*]] = atomicrmw sub ptr [[A]], i64 [[N]] monotonic, align 8
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i64 [[OLD]], [[N]]
+; CHECK-NEXT:    ret i1 [[CMP]]
+;
+entry:
+  %neg = sub nsw i64 0, %n
+  %old = atomicrmw add ptr %a, i64 %neg monotonic, align 8
+  %cmp = icmp eq i64 %old, %n
+  ret i1 %cmp
+}



More information about the llvm-commits mailing list