[llvm] [X86] Fold generic ADD/SUB with constants to X86ISD::SUB/ADD (PR #164316)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 23 08:09:59 PDT 2025
https://github.com/brandonxin updated https://github.com/llvm/llvm-project/pull/164316
>From 1a92b33f53d8a7d94cd9b1dca458606d283da10c Mon Sep 17 00:00:00 2001
From: Brandon <brandon.xin at outlook.com>
Date: Mon, 20 Oct 2025 14:14:19 -0500
Subject: [PATCH 1/5] [X86] Add new baseline tests for combineX86AddSub
---
llvm/test/CodeGen/X86/combine-adc.ll | 48 ++++++++++++++
llvm/test/CodeGen/X86/combine-sbb.ll | 95 ++++++++++++++++++++++++++++
2 files changed, 143 insertions(+)
diff --git a/llvm/test/CodeGen/X86/combine-adc.ll b/llvm/test/CodeGen/X86/combine-adc.ll
index 22417363f1093..0e46f2956a77e 100644
--- a/llvm/test/CodeGen/X86/combine-adc.ll
+++ b/llvm/test/CodeGen/X86/combine-adc.ll
@@ -89,4 +89,52 @@ define i32 @adc_merge_constants(i32 %a0) nounwind {
ret i32 %sum
}
+define i32 @adc_merge_sub(i32 %a0) nounwind {
+; X86-LABEL: adc_merge_sub:
+; X86: # %bb.0:
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: addl $42, %edi
+; X86-NEXT: setb %al
+; X86-NEXT: pushl %eax
+; X86-NEXT: calll use at PLT
+; X86-NEXT: addl $4, %esp
+; X86-NEXT: movl $-42, %eax
+; X86-NEXT: subl %esi, %eax
+; X86-NEXT: xorl %edi, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: retl
+;
+; X64-LABEL: adc_merge_sub:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbp
+; X64-NEXT: pushq %rbx
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl %edi, %ebx
+; X64-NEXT: xorl %edi, %edi
+; X64-NEXT: movl %ebx, %ebp
+; X64-NEXT: addl $42, %ebp
+; X64-NEXT: setb %dil
+; X64-NEXT: callq use at PLT
+; X64-NEXT: movl $-42, %eax
+; X64-NEXT: subl %ebx, %eax
+; X64-NEXT: xorl %ebp, %eax
+; X64-NEXT: addq $8, %rsp
+; X64-NEXT: popq %rbx
+; X64-NEXT: popq %rbp
+; X64-NEXT: retq
+ %adc = tail call { i8, i32 } @llvm.x86.addcarry.32(i8 0, i32 %a0, i32 42)
+ %carry = extractvalue { i8, i32 } %adc, 0
+ call void @use(i8 %carry)
+ %sum = extractvalue { i8, i32 } %adc, 1
+ %sub = sub i32 -42, %a0
+ %result = xor i32 %sum, %sub
+ ret i32 %result
+}
+
declare { i8, i32 } @llvm.x86.addcarry.32(i8, i32, i32)
+declare void @use(i8)
diff --git a/llvm/test/CodeGen/X86/combine-sbb.ll b/llvm/test/CodeGen/X86/combine-sbb.ll
index 89aee965a2c1f..ee74d97c3b690 100644
--- a/llvm/test/CodeGen/X86/combine-sbb.ll
+++ b/llvm/test/CodeGen/X86/combine-sbb.ll
@@ -333,4 +333,99 @@ define i32 @PR40483_sub6(ptr, i32) nounwind {
ret i32 %10
}
+define i32 @sbb_merge_add1(i32 %a0) nounwind {
+; X86-LABEL: sbb_merge_add1:
+; X86: # %bb.0:
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: movl %esi, %edi
+; X86-NEXT: subl $42, %edi
+; X86-NEXT: setb %al
+; X86-NEXT: pushl %eax
+; X86-NEXT: calll use at PLT
+; X86-NEXT: addl $4, %esp
+; X86-NEXT: addl $-42, %esi
+; X86-NEXT: xorl %edi, %esi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: retl
+;
+; X64-LABEL: sbb_merge_add1:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbp
+; X64-NEXT: pushq %rbx
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl %edi, %ebx
+; X64-NEXT: xorl %edi, %edi
+; X64-NEXT: movl %ebx, %ebp
+; X64-NEXT: subl $42, %ebp
+; X64-NEXT: setb %dil
+; X64-NEXT: callq use at PLT
+; X64-NEXT: addl $-42, %ebx
+; X64-NEXT: xorl %ebp, %ebx
+; X64-NEXT: movl %ebx, %eax
+; X64-NEXT: addq $8, %rsp
+; X64-NEXT: popq %rbx
+; X64-NEXT: popq %rbp
+; X64-NEXT: retq
+ %sbb = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %a0, i32 42)
+ %borrow = extractvalue { i8, i32 } %sbb, 0
+ call void @use(i8 %borrow)
+ %diff = extractvalue { i8, i32 } %sbb, 1
+ %add = add i32 %a0, -42
+ %result = xor i32 %diff, %add
+ ret i32 %result
+}
+
+define i32 @sbb_merge_add2(i32 %a0) nounwind {
+; X86-LABEL: sbb_merge_add2:
+; X86: # %bb.0:
+; X86-NEXT: pushl %edi
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl $42, %edi
+; X86-NEXT: xorl %eax, %eax
+; X86-NEXT: subl %esi, %edi
+; X86-NEXT: setb %al
+; X86-NEXT: pushl %eax
+; X86-NEXT: calll use at PLT
+; X86-NEXT: addl $4, %esp
+; X86-NEXT: addl $-42, %esi
+; X86-NEXT: xorl %edi, %esi
+; X86-NEXT: movl %esi, %eax
+; X86-NEXT: popl %esi
+; X86-NEXT: popl %edi
+; X86-NEXT: retl
+;
+; X64-LABEL: sbb_merge_add2:
+; X64: # %bb.0:
+; X64-NEXT: pushq %rbp
+; X64-NEXT: pushq %rbx
+; X64-NEXT: pushq %rax
+; X64-NEXT: movl %edi, %ebx
+; X64-NEXT: movl $42, %ebp
+; X64-NEXT: xorl %edi, %edi
+; X64-NEXT: subl %ebx, %ebp
+; X64-NEXT: setb %dil
+; X64-NEXT: callq use at PLT
+; X64-NEXT: addl $-42, %ebx
+; X64-NEXT: xorl %ebp, %ebx
+; X64-NEXT: movl %ebx, %eax
+; X64-NEXT: addq $8, %rsp
+; X64-NEXT: popq %rbx
+; X64-NEXT: popq %rbp
+; X64-NEXT: retq
+ %sbb = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 42, i32 %a0)
+ %borrow = extractvalue { i8, i32 } %sbb, 0
+ call void @use(i8 %borrow)
+ %diff = extractvalue { i8, i32 } %sbb, 1
+ %add = add i32 %a0, -42
+ %result = xor i32 %diff, %add
+ ret i32 %result
+}
+
declare { i8, i32 } @llvm.x86.subborrow.32(i8, i32, i32)
+declare void @use(i8)
>From b9f1d0f054a15c59af40adcfa5fad782e9b35d42 Mon Sep 17 00:00:00 2001
From: Brandon <brandon.xin at outlook.com>
Date: Mon, 20 Oct 2025 14:22:36 -0500
Subject: [PATCH 2/5] [X86] Fold generic ADD/SUB with constants to
X86ISD::SUB/ADD
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 25 +++-
llvm/test/CodeGen/X86/combine-adc.ll | 20 +--
llvm/test/CodeGen/X86/combine-sbb.ll | 48 +++----
.../CodeGen/X86/dag-update-nodetomatch.ll | 129 +++++++++---------
4 files changed, 114 insertions(+), 108 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index b5f8ee50cba3d..74a7d83aadfd9 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57616,10 +57616,10 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
}
// Fold any similar generic ADD/SUB opcodes to reuse this node.
- auto MatchGeneric = [&](SDValue N0, SDValue N1, bool Negate) {
+ auto MatchGeneric = [&](unsigned Opc, SDValue N0, SDValue N1, bool Negate) {
SDValue Ops[] = {N0, N1};
SDVTList VTs = DAG.getVTList(N->getValueType(0));
- if (SDNode *GenericAddSub = DAG.getNodeIfExists(GenericOpc, VTs, Ops)) {
+ if (SDNode *GenericAddSub = DAG.getNodeIfExists(Opc, VTs, Ops)) {
SDValue Op(N, 0);
if (Negate) {
// Bail if this is only used by a user of the x86 add/sub.
@@ -57631,8 +57631,25 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
DCI.CombineTo(GenericAddSub, Op);
}
};
- MatchGeneric(LHS, RHS, false);
- MatchGeneric(RHS, LHS, X86ISD::SUB == N->getOpcode());
+ MatchGeneric(GenericOpc, LHS, RHS, false);
+ MatchGeneric(GenericOpc, RHS, LHS, X86ISD::SUB == N->getOpcode());
+
+ if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(RHS)) {
+ SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT);
+ if (X86ISD::SUB == N->getOpcode()) {
+ // With LHS - C, fold LHS + (-C)
+ MatchGeneric(ISD::ADD, LHS, NegC, false);
+ } else {
+ // With -(LHS + C), fold (-C) - LHS
+ MatchGeneric(ISD::SUB, NegC, LHS, true);
+ }
+ } else if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(LHS)) {
+ SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT);
+ if (X86ISD::SUB == N->getOpcode()) {
+ // With -(C - RHS), fold RHS + (-C)
+ MatchGeneric(ISD::ADD, RHS, NegC, true);
+ }
+ }
// TODO: Can we drop the ZeroSecondOpOnly limit? This is to guarantee that the
// EFLAGS result doesn't change.
diff --git a/llvm/test/CodeGen/X86/combine-adc.ll b/llvm/test/CodeGen/X86/combine-adc.ll
index 0e46f2956a77e..a2aaea31aa6ff 100644
--- a/llvm/test/CodeGen/X86/combine-adc.ll
+++ b/llvm/test/CodeGen/X86/combine-adc.ll
@@ -94,17 +94,17 @@ define i32 @adc_merge_sub(i32 %a0) nounwind {
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: movl %esi, %edi
; X86-NEXT: addl $42, %edi
; X86-NEXT: setb %al
+; X86-NEXT: movl %edi, %esi
+; X86-NEXT: negl %esi
; X86-NEXT: pushl %eax
; X86-NEXT: calll use at PLT
; X86-NEXT: addl $4, %esp
-; X86-NEXT: movl $-42, %eax
-; X86-NEXT: subl %esi, %eax
-; X86-NEXT: xorl %edi, %eax
+; X86-NEXT: xorl %edi, %esi
+; X86-NEXT: movl %esi, %eax
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: retl
@@ -116,13 +116,13 @@ define i32 @adc_merge_sub(i32 %a0) nounwind {
; X64-NEXT: pushq %rax
; X64-NEXT: movl %edi, %ebx
; X64-NEXT: xorl %edi, %edi
-; X64-NEXT: movl %ebx, %ebp
-; X64-NEXT: addl $42, %ebp
+; X64-NEXT: addl $42, %ebx
; X64-NEXT: setb %dil
+; X64-NEXT: movl %ebx, %ebp
+; X64-NEXT: negl %ebp
; X64-NEXT: callq use at PLT
-; X64-NEXT: movl $-42, %eax
-; X64-NEXT: subl %ebx, %eax
-; X64-NEXT: xorl %ebp, %eax
+; X64-NEXT: xorl %ebx, %ebp
+; X64-NEXT: movl %ebp, %eax
; X64-NEXT: addq $8, %rsp
; X64-NEXT: popq %rbx
; X64-NEXT: popq %rbp
diff --git a/llvm/test/CodeGen/X86/combine-sbb.ll b/llvm/test/CodeGen/X86/combine-sbb.ll
index ee74d97c3b690..62744d4f3050a 100644
--- a/llvm/test/CodeGen/X86/combine-sbb.ll
+++ b/llvm/test/CodeGen/X86/combine-sbb.ll
@@ -336,40 +336,25 @@ define i32 @PR40483_sub6(ptr, i32) nounwind {
define i32 @sbb_merge_add1(i32 %a0) nounwind {
; X86-LABEL: sbb_merge_add1:
; X86: # %bb.0:
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: movl %esi, %edi
-; X86-NEXT: subl $42, %edi
+; X86-NEXT: cmpl $42, {{[0-9]+}}(%esp)
; X86-NEXT: setb %al
; X86-NEXT: pushl %eax
; X86-NEXT: calll use at PLT
; X86-NEXT: addl $4, %esp
-; X86-NEXT: addl $-42, %esi
-; X86-NEXT: xorl %edi, %esi
-; X86-NEXT: movl %esi, %eax
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
+; X86-NEXT: xorl %eax, %eax
; X86-NEXT: retl
;
; X64-LABEL: sbb_merge_add1:
; X64: # %bb.0:
-; X64-NEXT: pushq %rbp
-; X64-NEXT: pushq %rbx
; X64-NEXT: pushq %rax
-; X64-NEXT: movl %edi, %ebx
-; X64-NEXT: xorl %edi, %edi
-; X64-NEXT: movl %ebx, %ebp
-; X64-NEXT: subl $42, %ebp
-; X64-NEXT: setb %dil
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: cmpl $42, %edi
+; X64-NEXT: setb %al
+; X64-NEXT: movl %eax, %edi
; X64-NEXT: callq use at PLT
-; X64-NEXT: addl $-42, %ebx
-; X64-NEXT: xorl %ebp, %ebx
-; X64-NEXT: movl %ebx, %eax
-; X64-NEXT: addq $8, %rsp
-; X64-NEXT: popq %rbx
-; X64-NEXT: popq %rbp
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: popq %rcx
; X64-NEXT: retq
%sbb = tail call { i8, i32 } @llvm.x86.subborrow.32(i8 0, i32 %a0, i32 42)
%borrow = extractvalue { i8, i32 } %sbb, 0
@@ -385,15 +370,15 @@ define i32 @sbb_merge_add2(i32 %a0) nounwind {
; X86: # %bb.0:
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl $42, %edi
; X86-NEXT: xorl %eax, %eax
-; X86-NEXT: subl %esi, %edi
+; X86-NEXT: subl {{[0-9]+}}(%esp), %edi
; X86-NEXT: setb %al
+; X86-NEXT: movl %edi, %esi
+; X86-NEXT: negl %esi
; X86-NEXT: pushl %eax
; X86-NEXT: calll use at PLT
; X86-NEXT: addl $4, %esp
-; X86-NEXT: addl $-42, %esi
; X86-NEXT: xorl %edi, %esi
; X86-NEXT: movl %esi, %eax
; X86-NEXT: popl %esi
@@ -405,13 +390,14 @@ define i32 @sbb_merge_add2(i32 %a0) nounwind {
; X64-NEXT: pushq %rbp
; X64-NEXT: pushq %rbx
; X64-NEXT: pushq %rax
-; X64-NEXT: movl %edi, %ebx
; X64-NEXT: movl $42, %ebp
-; X64-NEXT: xorl %edi, %edi
-; X64-NEXT: subl %ebx, %ebp
-; X64-NEXT: setb %dil
+; X64-NEXT: xorl %eax, %eax
+; X64-NEXT: subl %edi, %ebp
+; X64-NEXT: setb %al
+; X64-NEXT: movl %ebp, %ebx
+; X64-NEXT: negl %ebx
+; X64-NEXT: movl %eax, %edi
; X64-NEXT: callq use at PLT
-; X64-NEXT: addl $-42, %ebx
; X64-NEXT: xorl %ebp, %ebx
; X64-NEXT: movl %ebx, %eax
; X64-NEXT: addq $8, %rsp
diff --git a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
index b428ce457ff40..71ad598abe683 100644
--- a/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
+++ b/llvm/test/CodeGen/X86/dag-update-nodetomatch.ll
@@ -96,6 +96,17 @@ entry:
define void @_Z2x6v() local_unnamed_addr {
; CHECK-LABEL: _Z2x6v:
; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: movq x1 at GOTPCREL(%rip), %rax
+; CHECK-NEXT: movl (%rax), %edx
+; CHECK-NEXT: andl $511, %edx # imm = 0x1FF
+; CHECK-NEXT: leaq 1(%rdx), %rax
+; CHECK-NEXT: movq x4 at GOTPCREL(%rip), %rcx
+; CHECK-NEXT: movl %eax, (%rcx)
+; CHECK-NEXT: movq x3 at GOTPCREL(%rip), %rcx
+; CHECK-NEXT: movl (%rcx), %ecx
+; CHECK-NEXT: testl %ecx, %ecx
+; CHECK-NEXT: je .LBB1_18
+; CHECK-NEXT: # %bb.1: # %for.cond1thread-pre-split.lr.ph
; CHECK-NEXT: pushq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: pushq %r15
@@ -114,58 +125,47 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: .cfi_offset %r14, -32
; CHECK-NEXT: .cfi_offset %r15, -24
; CHECK-NEXT: .cfi_offset %rbp, -16
-; CHECK-NEXT: movq x1 at GOTPCREL(%rip), %rax
-; CHECK-NEXT: movl (%rax), %ebx
-; CHECK-NEXT: andl $511, %ebx # imm = 0x1FF
-; CHECK-NEXT: leaq 1(%rbx), %rax
-; CHECK-NEXT: movq x4 at GOTPCREL(%rip), %rcx
-; CHECK-NEXT: movl %eax, (%rcx)
-; CHECK-NEXT: movq x3 at GOTPCREL(%rip), %rcx
-; CHECK-NEXT: movl (%rcx), %ecx
-; CHECK-NEXT: testl %ecx, %ecx
-; CHECK-NEXT: je .LBB1_18
-; CHECK-NEXT: # %bb.1: # %for.cond1thread-pre-split.lr.ph
-; CHECK-NEXT: movq x5 at GOTPCREL(%rip), %rdx
-; CHECK-NEXT: movq (%rdx), %rsi
-; CHECK-NEXT: movl %ecx, %edx
-; CHECK-NEXT: notl %edx
-; CHECK-NEXT: leaq 8(,%rdx,8), %rdi
+; CHECK-NEXT: movq x5 at GOTPCREL(%rip), %rsi
+; CHECK-NEXT: movq (%rsi), %rsi
+; CHECK-NEXT: movl %ecx, %edi
+; CHECK-NEXT: notl %edi
+; CHECK-NEXT: leaq 8(,%rdi,8), %rdi
; CHECK-NEXT: imulq %rax, %rdi
; CHECK-NEXT: addq %rsi, %rdi
; CHECK-NEXT: movq x2 at GOTPCREL(%rip), %r8
-; CHECK-NEXT: movl (%r8), %edx
-; CHECK-NEXT: leal 8(,%rbx,8), %eax
+; CHECK-NEXT: movl (%r8), %r9d
+; CHECK-NEXT: leal 8(,%rdx,8), %eax
; CHECK-NEXT: movq %rax, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
-; CHECK-NEXT: leaq 32(%rsi), %r11
-; CHECK-NEXT: leaq 8(,%rbx,8), %rbx
-; CHECK-NEXT: xorl %r14d, %r14d
-; CHECK-NEXT: movq x0 at GOTPCREL(%rip), %r15
-; CHECK-NEXT: movq %rsi, %r12
+; CHECK-NEXT: leaq 32(%rsi), %rbx
+; CHECK-NEXT: leaq 8(,%rdx,8), %r14
+; CHECK-NEXT: xorl %r15d, %r15d
+; CHECK-NEXT: movq x0 at GOTPCREL(%rip), %r12
+; CHECK-NEXT: movq %rsi, %r13
; CHECK-NEXT: jmp .LBB1_2
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB1_15: # %for.cond1.for.inc3_crit_edge
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: movl %edx, (%r8)
+; CHECK-NEXT: movl %r9d, (%r8)
; CHECK-NEXT: .LBB1_16: # %for.inc3
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: addq %rbx, %r12
-; CHECK-NEXT: incq %r14
-; CHECK-NEXT: addq %rbx, %r11
+; CHECK-NEXT: addq %r14, %r13
+; CHECK-NEXT: incq %r15
+; CHECK-NEXT: addq %r14, %rbx
; CHECK-NEXT: incl %ecx
; CHECK-NEXT: je .LBB1_17
; CHECK-NEXT: .LBB1_2: # %for.cond1thread-pre-split
; CHECK-NEXT: # =>This Loop Header: Depth=1
; CHECK-NEXT: # Child Loop BB1_12 Depth 2
; CHECK-NEXT: # Child Loop BB1_14 Depth 2
-; CHECK-NEXT: testl %edx, %edx
+; CHECK-NEXT: testl %r9d, %r9d
; CHECK-NEXT: jns .LBB1_16
; CHECK-NEXT: # %bb.3: # %for.body2.preheader
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: movslq %edx, %r13
-; CHECK-NEXT: testq %r13, %r13
+; CHECK-NEXT: movslq %r9d, %r9
+; CHECK-NEXT: testq %r9, %r9
; CHECK-NEXT: movq $-1, %rbp
-; CHECK-NEXT: cmovnsq %r13, %rbp
-; CHECK-NEXT: subq %r13, %rbp
+; CHECK-NEXT: cmovnsq %r9, %rbp
+; CHECK-NEXT: subq %r9, %rbp
; CHECK-NEXT: incq %rbp
; CHECK-NEXT: cmpq $4, %rbp
; CHECK-NEXT: jb .LBB1_14
@@ -177,20 +177,20 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: # %bb.5: # %vector.memcheck
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
-; CHECK-NEXT: imulq %r14, %rax
-; CHECK-NEXT: leaq (%rsi,%rax), %r10
-; CHECK-NEXT: leaq (%r10,%r13,8), %r9
-; CHECK-NEXT: testq %r13, %r13
-; CHECK-NEXT: movq $-1, %r10
-; CHECK-NEXT: cmovnsq %r13, %r10
-; CHECK-NEXT: cmpq %r15, %r9
+; CHECK-NEXT: imulq %r15, %rax
+; CHECK-NEXT: leaq (%rsi,%rax), %r11
+; CHECK-NEXT: leaq (%r11,%r9,8), %r10
+; CHECK-NEXT: testq %r9, %r9
+; CHECK-NEXT: movq $-1, %r11
+; CHECK-NEXT: cmovnsq %r9, %r11
+; CHECK-NEXT: cmpq %r12, %r10
; CHECK-NEXT: jae .LBB1_7
; CHECK-NEXT: # %bb.6: # %vector.memcheck
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: leaq 8(%rsi), %r9
-; CHECK-NEXT: addq %r9, %rax
-; CHECK-NEXT: leaq (%rax,%r10,8), %rax
-; CHECK-NEXT: cmpq %r15, %rax
+; CHECK-NEXT: leaq 8(%rsi), %r10
+; CHECK-NEXT: addq %r10, %rax
+; CHECK-NEXT: leaq (%rax,%r11,8), %rax
+; CHECK-NEXT: cmpq %r12, %rax
; CHECK-NEXT: ja .LBB1_14
; CHECK-NEXT: .LBB1_7: # %vector.body.preheader
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
@@ -201,50 +201,47 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; CHECK-NEXT: movdqu %xmm0, (%r12,%r13,8)
-; CHECK-NEXT: movdqu %xmm0, 16(%r12,%r13,8)
-; CHECK-NEXT: movl $4, %r10d
+; CHECK-NEXT: movdqu %xmm0, (%r13,%r9,8)
+; CHECK-NEXT: movdqu %xmm0, 16(%r13,%r9,8)
+; CHECK-NEXT: movl $4, %r11d
; CHECK-NEXT: shrq $2, %rax
; CHECK-NEXT: jne .LBB1_11
; CHECK-NEXT: jmp .LBB1_13
; CHECK-NEXT: .LBB1_8: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: xorl %r10d, %r10d
+; CHECK-NEXT: xorl %r11d, %r11d
; CHECK-NEXT: shrq $2, %rax
; CHECK-NEXT: je .LBB1_13
; CHECK-NEXT: .LBB1_11: # %vector.body.preheader.new
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
; CHECK-NEXT: movq {{.*#+}} xmm0 = mem[0],zero
; CHECK-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
-; CHECK-NEXT: movq %r10, %rax
+; CHECK-NEXT: movq %r11, %rax
; CHECK-NEXT: subq %rdx, %rax
-; CHECK-NEXT: addq %r13, %r10
-; CHECK-NEXT: leaq (%r11,%r10,8), %r10
+; CHECK-NEXT: addq %r9, %r11
+; CHECK-NEXT: leaq (%rbx,%r11,8), %r11
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB1_12: # %vector.body
; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: movdqu %xmm0, -32(%r10)
-; CHECK-NEXT: movdqu %xmm0, -16(%r10)
-; CHECK-NEXT: movdqu %xmm0, (%r10)
-; CHECK-NEXT: movdqu %xmm0, 16(%r10)
-; CHECK-NEXT: addq $64, %r10
+; CHECK-NEXT: movdqu %xmm0, -32(%r11)
+; CHECK-NEXT: movdqu %xmm0, -16(%r11)
+; CHECK-NEXT: movdqu %xmm0, (%r11)
+; CHECK-NEXT: movdqu %xmm0, 16(%r11)
+; CHECK-NEXT: addq $64, %r11
; CHECK-NEXT: addq $8, %rax
; CHECK-NEXT: jne .LBB1_12
; CHECK-NEXT: .LBB1_13: # %middle.block
; CHECK-NEXT: # in Loop: Header=BB1_2 Depth=1
-; CHECK-NEXT: addq %rdx, %r13
+; CHECK-NEXT: addq %rdx, %r9
; CHECK-NEXT: cmpq %rdx, %rbp
-; CHECK-NEXT: movq %r13, %rdx
; CHECK-NEXT: je .LBB1_15
; CHECK-NEXT: .p2align 4
; CHECK-NEXT: .LBB1_14: # %for.body2
; CHECK-NEXT: # Parent Loop BB1_2 Depth=1
; CHECK-NEXT: # => This Inner Loop Header: Depth=2
-; CHECK-NEXT: movq (%r15), %rax
-; CHECK-NEXT: movq %rax, (%r12,%r13,8)
-; CHECK-NEXT: leaq 1(%r13), %rdx
-; CHECK-NEXT: cmpq $-1, %r13
-; CHECK-NEXT: movq %rdx, %r13
+; CHECK-NEXT: movq (%r12), %rax
+; CHECK-NEXT: movq %rax, (%r13,%r9,8)
+; CHECK-NEXT: incq %r9
; CHECK-NEXT: jl .LBB1_14
; CHECK-NEXT: jmp .LBB1_15
; CHECK-NEXT: .LBB1_17: # %for.cond.for.end5_crit_edge
@@ -252,7 +249,6 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: movq %rdi, (%rax)
; CHECK-NEXT: movq x3 at GOTPCREL(%rip), %rax
; CHECK-NEXT: movl $0, (%rax)
-; CHECK-NEXT: .LBB1_18: # %for.end5
; CHECK-NEXT: popq %rbx
; CHECK-NEXT: .cfi_def_cfa_offset 48
; CHECK-NEXT: popq %r12
@@ -265,6 +261,13 @@ define void @_Z2x6v() local_unnamed_addr {
; CHECK-NEXT: .cfi_def_cfa_offset 16
; CHECK-NEXT: popq %rbp
; CHECK-NEXT: .cfi_def_cfa_offset 8
+; CHECK-NEXT: .cfi_restore %rbx
+; CHECK-NEXT: .cfi_restore %r12
+; CHECK-NEXT: .cfi_restore %r13
+; CHECK-NEXT: .cfi_restore %r14
+; CHECK-NEXT: .cfi_restore %r15
+; CHECK-NEXT: .cfi_restore %rbp
+; CHECK-NEXT: .LBB1_18: # %for.end5
; CHECK-NEXT: retq
entry:
%0 = load i32, ptr @x1, align 4
>From 576b93ebfc595871afd19348081bb4063e8dc8b7 Mon Sep 17 00:00:00 2001
From: Brandon <brandon.xin at outlook.com>
Date: Wed, 22 Oct 2025 21:48:32 -0500
Subject: [PATCH 3/5] [X86] Fix misleading comments in combineX86AddSub
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 6 +++---
1 file changed, 3 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 74a7d83aadfd9..84dbaec297eda 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57637,16 +57637,16 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(RHS)) {
SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT);
if (X86ISD::SUB == N->getOpcode()) {
- // With LHS - C, fold LHS + (-C)
+ // Fold generic add(LHS, -C) to X86ISD::SUB(LHS, C).
MatchGeneric(ISD::ADD, LHS, NegC, false);
} else {
- // With -(LHS + C), fold (-C) - LHS
+ // Negate X86ISD::ADD(LHS, C) and replace generic sub(-C, LHS).
MatchGeneric(ISD::SUB, NegC, LHS, true);
}
} else if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(LHS)) {
SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT);
if (X86ISD::SUB == N->getOpcode()) {
- // With -(C - RHS), fold RHS + (-C)
+ // Negate X86ISD::SUB(C, RHS) and replace generic add(RHS, -C).
MatchGeneric(ISD::ADD, RHS, NegC, true);
}
}
>From 4662b9e556bbe812d9a42f1da4cb5c9588ff19e2 Mon Sep 17 00:00:00 2001
From: Brandon <brandon.xin at outlook.com>
Date: Thu, 23 Oct 2025 10:01:56 -0500
Subject: [PATCH 4/5] [X86] Move constant acquirement inside the `if` block to
avoid unnecessary DAG node creation
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 84dbaec297eda..bf904fbf2620c 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57644,8 +57644,8 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
MatchGeneric(ISD::SUB, NegC, LHS, true);
}
} else if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(LHS)) {
- SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT);
if (X86ISD::SUB == N->getOpcode()) {
+ SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT);
// Negate X86ISD::SUB(C, RHS) and replace generic add(RHS, -C).
MatchGeneric(ISD::ADD, RHS, NegC, true);
}
>From 64521e9f7f31a2f4160902e644131ab26f551b66 Mon Sep 17 00:00:00 2001
From: Brandon <brandon.xin at outlook.com>
Date: Thu, 23 Oct 2025 10:09:41 -0500
Subject: [PATCH 5/5] [X86] Use`auto` with `dyn_cast` for style consistency
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index bf904fbf2620c..6eceb3a728bfe 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -57634,7 +57634,7 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
MatchGeneric(GenericOpc, LHS, RHS, false);
MatchGeneric(GenericOpc, RHS, LHS, X86ISD::SUB == N->getOpcode());
- if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(RHS)) {
+ if (auto *Const = dyn_cast<ConstantSDNode>(RHS)) {
SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT);
if (X86ISD::SUB == N->getOpcode()) {
// Fold generic add(LHS, -C) to X86ISD::SUB(LHS, C).
@@ -57643,7 +57643,7 @@ static SDValue combineX86AddSub(SDNode *N, SelectionDAG &DAG,
// Negate X86ISD::ADD(LHS, C) and replace generic sub(-C, LHS).
MatchGeneric(ISD::SUB, NegC, LHS, true);
}
- } else if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(LHS)) {
+ } else if (auto *Const = dyn_cast<ConstantSDNode>(LHS)) {
if (X86ISD::SUB == N->getOpcode()) {
SDValue NegC = DAG.getConstant(-Const->getAPIntValue(), DL, VT);
// Negate X86ISD::SUB(C, RHS) and replace generic add(RHS, -C).
More information about the llvm-commits
mailing list