[llvm] 1075a2f - [Instcombine] Write Instcombine pass to strength reduce lock xadd to lock sub (#184715)

Sat Mar 21 13:45:35 PDT 2026

Author: Takashi Idobe
Date: 2026-03-21T20:45:29Z
New Revision: 1075a2fa7ed0a576e5aeb4af03b570eab1aaae04

URL: https://github.com/llvm/llvm-project/commit/1075a2fa7ed0a576e5aeb4af03b570eab1aaae04
DIFF: https://github.com/llvm/llvm-project/commit/1075a2fa7ed0a576e5aeb4af03b570eab1aaae04.diff

LOG: [Instcombine] Write Instcombine pass to strength reduce lock xadd to lock sub (#184715)

Resolves: https://github.com/llvm/llvm-project/issues/174933

The issue goes into a case where fetch_sub(n) is properly optimized but
fetch_add(neg(n)) is not optimized to the same code.

Although the issue is tagged for x86 I assumed this be best handled
outside of the backends so I put this in InstCombine.

Added: 
    llvm/test/Transforms/InstCombine/atomicrmw-add-neg.ll

Modified: 
    llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
index a2e8c695331a6..4d6b0684abe4d 100644

--- a/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineAtomicRMW.cpp
@@ -14,6 +14,7 @@
 #include "llvm/IR/Instructions.h"
 
 using namespace llvm;
+using namespace PatternMatch;
 
 /// Return true if and only if the given instruction does not modify the memory
 /// location referenced.  Note that an idemptent atomicrmw may still have
@@ -118,6 +119,23 @@ Instruction *InstCombinerImpl::visitAtomicRMWInst(AtomicRMWInst &RMWI) {
          RMWI.getOrdering() != AtomicOrdering::Unordered &&
          "AtomicRMWs don't make sense with Unordered or NotAtomic");
 
+  // Canonicalize atomicrmw add(ptr, neg(X)) -> atomicrmw sub(ptr, X)
+  //              atomicrmw sub(ptr, neg(X)) -> atomicrmw add(ptr, X)
+  // old + (-X) == old - X and old - (-X) == old + X; the returned old value
+  // is identical in both cases. We match strictly on `sub 0, X` (negation) to
+  // avoid infinite loops: a general negation of `sub A, B` yields `sub B, A`,
+  // which would infinitely be negated back on the next iteration.
+  auto Op = RMWI.getOperation();
+  if (Op == AtomicRMWInst::Add || Op == AtomicRMWInst::Sub) {
+    Value *Val = RMWI.getValOperand();
+    Value *X;
+    if (match(Val, m_Neg(m_Value(X)))) {
+      RMWI.setOperation(Op == AtomicRMWInst::Add ? AtomicRMWInst::Sub
+                                                 : AtomicRMWInst::Add);
+      return replaceOperand(RMWI, 1, X);
+    }
+  }
+
   if (!isIdempotentRMW(RMWI))
     return nullptr;
 

diff  --git a/llvm/test/Transforms/InstCombine/atomicrmw-add-neg.ll b/llvm/test/Transforms/InstCombine/atomicrmw-add-neg.ll
new file mode 100644
index 0000000000000..9269b0d2465ba
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/atomicrmw-add-neg.ll
@@ -0,0 +1,95 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
+; RUN: opt -passes=instcombine -S %s | FileCheck %s
+
+; Regression case: atomicrmw sub with variable is left unchanged
+define i64 @fn_sub(ptr %a, i64 %n) {
+; CHECK-LABEL: define i64 @fn_sub(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw sub ptr [[A]], i64 [[N]] monotonic, align 8
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %1 = atomicrmw sub ptr %a, i64 %n monotonic
+  ret i64 %1
+}
+
+; Canonicalize atomicrmw add(ptr, neg(n)) -> atomicrmw sub(ptr, n)
+define i64 @fn_add(ptr %a, i64 %n) {
+; CHECK-LABEL: define i64 @fn_add(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw sub ptr [[A]], i64 [[N]] monotonic, align 8
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %sub = sub i64 0, %n
+  %1 = atomicrmw add ptr %a, i64 %sub monotonic
+  ret i64 %1
+}
+
+; Canonicalize atomicrmw add(ptr, neg(n)) -> atomicrmw sub(ptr, n)
+; even when the negation is nsw
+define i64 @fn_adds(ptr %a, i64 %n) {
+; CHECK-LABEL: define i64 @fn_adds(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[OLD:%.*]] = atomicrmw sub ptr [[A]], i64 [[N]] monotonic, align 8
+; CHECK-NEXT:    ret i64 [[OLD]]
+;
+  %neg = sub nsw i64 0, %n
+  %old = atomicrmw add ptr %a, i64 %neg monotonic
+  ret i64 %old
+}
+
+; Canonicalize atomicrmw sub(ptr, neg(n)) -> atomicrmw add(ptr, n)
+define i64 @fn_sub_neg(ptr %a, i64 %n) {
+; CHECK-LABEL: define i64 @fn_sub_neg(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw add ptr [[A]], i64 [[N]] monotonic, align 8
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %neg = sub i64 0, %n
+  %1 = atomicrmw sub ptr %a, i64 %neg monotonic
+  ret i64 %1
+}
+
+; Don't canonicalize if the negated value has multiple uses -- as that would add an extra instruction.
+define i64 @fn_add_1_with_use(ptr %a, i64 %n) {
+; CHECK-LABEL: define i64 @fn_add_1_with_use(
+; CHECK-SAME: ptr [[A:%.*]], i64 [[N:%.*]]) {
+; CHECK-NEXT:    [[ADD:%.*]] = add i64 [[N]], 1
+; CHECK-NEXT:    call void @use(i64 [[ADD]])
+; CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw add ptr [[A]], i64 [[ADD]] monotonic, align 8
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %add = add i64 %n, 1
+  call void @use(i64 %add)
+  %1 = atomicrmw add ptr %a, i64 %add monotonic
+  ret i64 %1
+}
+
+; Don't canonicalize atomicrmw sub(ptr, sub(a, b)) -> atomicrmw add(ptr, sub(b, a)),
+; as that would create an infinite loop by continuously swapping the sub operands.
+define i64 @fn_sub_of_two_vars(ptr %p, i64 %a, i64 %b) {
+; CHECK-LABEL: define i64 @fn_sub_of_two_vars(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[A]], [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw sub ptr [[P]], i64 [[SUB]] monotonic, align 8
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %sub = sub i64 %a, %b
+  %1 = atomicrmw sub ptr %p, i64 %sub monotonic
+  ret i64 %1
+}
+
+; Don't canonicalize atomicrmw add(ptr, sub(a, b)) -> atomicrmw sub(ptr, sub(b, a)),
+; as that would create an infinite loop by continuously swapping the sub operands.
+define i64 @fn_add_of_two_vars(ptr %p, i64 %a, i64 %b) {
+; CHECK-LABEL: define i64 @fn_add_of_two_vars(
+; CHECK-SAME: ptr [[P:%.*]], i64 [[A:%.*]], i64 [[B:%.*]]) {
+; CHECK-NEXT:    [[SUB:%.*]] = sub i64 [[A]], [[B]]
+; CHECK-NEXT:    [[TMP1:%.*]] = atomicrmw add ptr [[P]], i64 [[SUB]] monotonic, align 8
+; CHECK-NEXT:    ret i64 [[TMP1]]
+;
+  %sub = sub i64 %a, %b
+  %1 = atomicrmw add ptr %p, i64 %sub monotonic
+  ret i64 %1
+}
+
+declare void @use(i64)