[llvm] dba4188 - [X86] combineAdd - fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y) (#142734)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jun 5 00:20:48 PDT 2025
Author: Simon Pilgrim
Date: 2025-06-05T08:20:44+01:00
New Revision: dba418816731bc1cc677519fdbb77caca812ddda
URL: https://github.com/llvm/llvm-project/commit/dba418816731bc1cc677519fdbb77caca812ddda
DIFF: https://github.com/llvm/llvm-project/commit/dba418816731bc1cc677519fdbb77caca812ddda.diff
LOG: [X86] combineAdd - fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y) (#142734)
Attempt to keep adds/shifts closer together for LEA matching
Fixes #55714
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/addr-mode-matcher-3.ll
llvm/test/CodeGen/X86/apx/reloc-opt.ll
llvm/test/CodeGen/X86/buildvec-widen-dotproduct.ll
llvm/test/CodeGen/X86/mul-constant-i64.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 6cc7c3d3afbc4..7aaf68cdb1794 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -58048,6 +58048,23 @@ static SDValue combineAdd(SDNode *N, SelectionDAG &DAG,
if (SDValue V = combineToHorizontalAddSub(N, DAG, Subtarget))
return V;
+ // Canonicalize hidden LEA pattern:
+ // Fold (add (sub (shl x, c), y), z) -> (sub (add (shl x, c), z), y)
+ // iff c < 4
+ if (VT == MVT::i32 || VT == MVT::i64) {
+ SDValue Y, Z, Shift;
+ APInt Amt;
+ if (sd_match(
+ N, m_Add(m_OneUse(m_Sub(m_AllOf(m_Value(Shift),
+ m_Shl(m_Value(), m_ConstInt(Amt))),
+ m_Value(Y))),
+ m_Value(Z))) &&
+ Amt.ult(4) && !isa<ConstantSDNode>(Z)) {
+ return DAG.getNode(ISD::SUB, DL, VT,
+ DAG.getNode(ISD::ADD, DL, VT, Shift, Z), Y);
+ }
+ }
+
// add(psadbw(X,0),psadbw(Y,0)) -> psadbw(add(X,Y),0)
// iff X and Y won't overflow.
if (Op0.getOpcode() == X86ISD::PSADBW && Op1.getOpcode() == X86ISD::PSADBW &&
diff --git a/llvm/test/CodeGen/X86/addr-mode-matcher-3.ll b/llvm/test/CodeGen/X86/addr-mode-matcher-3.ll
index 522b42e07c6e0..beea6d36fe874 100644
--- a/llvm/test/CodeGen/X86/addr-mode-matcher-3.ll
+++ b/llvm/test/CodeGen/X86/addr-mode-matcher-3.ll
@@ -104,16 +104,16 @@ define i32 @PR55714_i32(i32 %n, i32 %q) {
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: leal (,%ecx,8), %eax
-; X86-NEXT: subl %ecx, %eax
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: PR55714_i32:
; X64: # %bb.0:
; X64-NEXT: # kill: def $esi killed $esi def $rsi
-; X64-NEXT: leal (,%rsi,8), %eax
+; X64-NEXT: # kill: def $edi killed $edi def $rdi
+; X64-NEXT: leal (%rdi,%rsi,8), %eax
; X64-NEXT: subl %esi, %eax
-; X64-NEXT: addl %edi, %eax
; X64-NEXT: retq
%mul = mul i32 %q, 7
%add = add i32 %mul, %n
@@ -123,21 +123,19 @@ define i32 @PR55714_i32(i32 %n, i32 %q) {
define i64 @PR55714_i64(i64 %n, i64 %q) {
; X86-LABEL: PR55714_i64:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: leal (,%eax,8), %ecx
-; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $7, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: leal (%edx,%ecx,8), %edx
+; X86-NEXT: subl %ecx, %edx
; X86-NEXT: addl {{[0-9]+}}(%esp), %eax
; X86-NEXT: adcl {{[0-9]+}}(%esp), %edx
; X86-NEXT: retl
;
; X64-LABEL: PR55714_i64:
; X64: # %bb.0:
-; X64-NEXT: leaq (,%rsi,8), %rax
+; X64-NEXT: leaq (%rdi,%rsi,8), %rax
; X64-NEXT: subq %rsi, %rax
-; X64-NEXT: addq %rdi, %rax
; X64-NEXT: retq
%mul = mul i64 %q, 7
%add = add i64 %mul, %n
diff --git a/llvm/test/CodeGen/X86/apx/reloc-opt.ll b/llvm/test/CodeGen/X86/apx/reloc-opt.ll
index a5ab94b00d64b..ecc3d3297ceab 100644
--- a/llvm/test/CodeGen/X86/apx/reloc-opt.ll
+++ b/llvm/test/CodeGen/X86/apx/reloc-opt.ll
@@ -13,11 +13,9 @@
; CHECK-LABEL: test_regclass_not_updated_by_regalloc_1
-; APXREL: movq (%rip), %r16
-; APXREL-NEXT: R_X86_64_CODE_4_GOTPCRELX gvar-0x4
-; NOAPXREL-NOT: R_X86_64_CODE_4_GOTPCRELX gvar-0x4
-; NOAPXREL: movq (%rip), %rdi
-; NOAPXREL-NEXT: R_X86_64_REX_GOTPCRELX gvar-0x4
+; CHECK-NOT: R_X86_64_CODE_4_GOTPCRELX gvar-0x4
+; CHECK: movq (%rip), %rdi
+; CHECK-NEXT: R_X86_64_REX_GOTPCRELX gvar-0x4
@gvar = external global [20000 x i8]
diff --git a/llvm/test/CodeGen/X86/buildvec-widen-dotproduct.ll b/llvm/test/CodeGen/X86/buildvec-widen-dotproduct.ll
index 345014edd0e9d..5e94598565aa9 100644
--- a/llvm/test/CodeGen/X86/buildvec-widen-dotproduct.ll
+++ b/llvm/test/CodeGen/X86/buildvec-widen-dotproduct.ll
@@ -7,7 +7,6 @@
define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
; SSE2-LABEL: dot_ext_v8i8_v8i32:
; SSE2: # %bb.0: # %entry
-; SSE2-NEXT: pushq %r14
; SSE2-NEXT: pushq %rbx
; SSE2-NEXT: movzbl (%rdi), %eax
; SSE2-NEXT: movzbl (%rdi,%rsi), %ecx
@@ -18,9 +17,9 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
; SSE2-NEXT: leaq (%rsi,%rsi,4), %rbx
; SSE2-NEXT: movzbl (%rdi,%rbx), %ebx
; SSE2-NEXT: movzbl (%rdi,%r9,2), %r9d
-; SSE2-NEXT: leaq (,%rsi,8), %r14
-; SSE2-NEXT: subq %rsi, %r14
-; SSE2-NEXT: movzbl (%rdi,%r14), %esi
+; SSE2-NEXT: leaq (%rdi,%rsi,8), %rdi
+; SSE2-NEXT: subq %rsi, %rdi
+; SSE2-NEXT: movzbl (%rdi), %esi
; SSE2-NEXT: shll $16, %ecx
; SSE2-NEXT: orl %eax, %ecx
; SSE2-NEXT: movd %ecx, %xmm0
@@ -38,7 +37,6 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
; SSE2-NEXT: paddd %xmm0, %xmm1
; SSE2-NEXT: movd %xmm1, %eax
; SSE2-NEXT: popq %rbx
-; SSE2-NEXT: popq %r14
; SSE2-NEXT: retq
;
; SSE4-LABEL: dot_ext_v8i8_v8i32:
@@ -46,7 +44,7 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
; SSE4-NEXT: movzbl (%rdi), %eax
; SSE4-NEXT: leaq (%rsi,%rsi,4), %rcx
; SSE4-NEXT: leaq (%rsi,%rsi,2), %r8
-; SSE4-NEXT: leaq (,%rsi,8), %r9
+; SSE4-NEXT: leaq (%rdi,%rsi,8), %r9
; SSE4-NEXT: subq %rsi, %r9
; SSE4-NEXT: movd %eax, %xmm0
; SSE4-NEXT: pinsrb $2, (%rdi,%rsi), %xmm0
@@ -55,7 +53,7 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
; SSE4-NEXT: pinsrb $8, (%rdi,%rsi,4), %xmm0
; SSE4-NEXT: pinsrb $10, (%rdi,%rcx), %xmm0
; SSE4-NEXT: pinsrb $12, (%rdi,%r8,2), %xmm0
-; SSE4-NEXT: pinsrb $14, (%rdi,%r9), %xmm0
+; SSE4-NEXT: pinsrb $14, (%r9), %xmm0
; SSE4-NEXT: movdqu (%rdx), %xmm1
; SSE4-NEXT: pmaddwd %xmm0, %xmm1
; SSE4-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3]
@@ -70,7 +68,7 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
; AVX-NEXT: movzbl (%rdi), %eax
; AVX-NEXT: leaq (%rsi,%rsi,2), %rcx
; AVX-NEXT: leaq (%rsi,%rsi,4), %r8
-; AVX-NEXT: leaq (,%rsi,8), %r9
+; AVX-NEXT: leaq (%rdi,%rsi,8), %r9
; AVX-NEXT: subq %rsi, %r9
; AVX-NEXT: vmovd %eax, %xmm0
; AVX-NEXT: vpinsrb $2, (%rdi,%rsi), %xmm0, %xmm0
@@ -79,7 +77,7 @@ define i32 @dot_ext_v8i8_v8i32(ptr %a, i64 %a_stride, ptr %b) nounwind {
; AVX-NEXT: vpinsrb $8, (%rdi,%rsi,4), %xmm0, %xmm0
; AVX-NEXT: vpinsrb $10, (%rdi,%r8), %xmm0, %xmm0
; AVX-NEXT: vpinsrb $12, (%rdi,%rcx,2), %xmm0, %xmm0
-; AVX-NEXT: vpinsrb $14, (%rdi,%r9), %xmm0, %xmm0
+; AVX-NEXT: vpinsrb $14, (%r9), %xmm0, %xmm0
; AVX-NEXT: vpmaddwd (%rdx), %xmm0, %xmm0
; AVX-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3]
; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
diff --git a/llvm/test/CodeGen/X86/mul-constant-i64.ll b/llvm/test/CodeGen/X86/mul-constant-i64.ll
index 03dd5351c78ac..40d591f8d1be8 100644
--- a/llvm/test/CodeGen/X86/mul-constant-i64.ll
+++ b/llvm/test/CodeGen/X86/mul-constant-i64.ll
@@ -166,12 +166,11 @@ define i64 @test_mul_by_6(i64 %x) {
define i64 @test_mul_by_7(i64 %x) {
; X86-LABEL: test_mul_by_7:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: leal (,%eax,8), %ecx
-; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl $7, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: leal (%edx,%ecx,8), %edx
+; X86-NEXT: subl %ecx, %edx
; X86-NEXT: retl
;
; X86-NOOPT-LABEL: test_mul_by_7:
@@ -733,16 +732,17 @@ define i64 @test_mul_by_22(i64 %x) {
ret i64 %mul
}
-define i64 @test_mul_by_23(i64 %x) {
+define i64 @test_mul_by_23(i64 %x) nounwind {
; X86-LABEL: test_mul_by_23:
; X86: # %bb.0:
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: leal (%eax,%eax,2), %ecx
-; X86-NEXT: shll $3, %ecx
-; X86-NEXT: subl %eax, %ecx
+; X86-NEXT: pushl %esi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: leal (%ecx,%ecx,2), %esi
; X86-NEXT: movl $23, %eax
; X86-NEXT: mull {{[0-9]+}}(%esp)
-; X86-NEXT: addl %ecx, %edx
+; X86-NEXT: leal (%edx,%esi,8), %edx
+; X86-NEXT: subl %ecx, %edx
+; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X86-NOOPT-LABEL: test_mul_by_23:
More information about the llvm-commits
mailing list