[llvm] [DAG] Recognise AVGFLOOR (((A >> 1) + (B >> 1)) + (A & B & 1)) patterns (PR #169644)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 26 04:14:59 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: Simon Pilgrim (RKSimon)
<details>
<summary>Changes</summary>
Recognise 'LSB' style AVGFLOOR patterns.
I've attempted to use the m_Reassociatable* pattern matchers, but encountered an issue in that we can't correctly match m_Value/m_Deferred pairs in the same reassociation as it appears that we have no guarantees on the order of matching. I'll raise a bug for this, and in the meantime we have the pattern in the test_lsb_i32 tests to show the missed matching opportunity.
Fixes #<!-- -->53648
---
Full diff: https://github.com/llvm/llvm-project/pull/169644.diff
3 Files Affected:
- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+16-4)
- (modified) llvm/test/CodeGen/X86/avgfloors-scalar.ll (+27-47)
- (modified) llvm/test/CodeGen/X86/avgflooru-scalar.ll (+22-54)
``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6b79dbb46cadc..813cbeafeaec9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3154,19 +3154,31 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
}
// Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
+// Attempt to form avgfloor(A, B) from ((A >> 1) + (B >> 1)) + (A & B & 1)
SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
SDValue N0 = N->getOperand(0);
EVT VT = N0.getValueType();
SDValue A, B;
+ // FIXME: m_ReassociatableAdd can't handle m_Value/m_Deferred mixing.
if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
- sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
- m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
+ (sd_match(N,
+ m_Add(m_And(m_Value(A), m_Value(B)),
+ m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) ||
+ sd_match(N, m_Add(m_Add(m_Srl(m_Value(A), m_One()),
+ m_Srl(m_Value(B), m_One())),
+ m_ReassociatableAnd(m_Deferred(A), m_Deferred(B),
+ m_One()))))) {
return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
}
if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
- sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
- m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
+ (sd_match(N,
+ m_Add(m_And(m_Value(A), m_Value(B)),
+ m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) ||
+ sd_match(N, m_Add(m_Add(m_Sra(m_Value(A), m_One()),
+ m_Sra(m_Value(B), m_One())),
+ m_ReassociatableAnd(m_Deferred(A), m_Deferred(B),
+ m_One()))))) {
return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
}
diff --git a/llvm/test/CodeGen/X86/avgfloors-scalar.ll b/llvm/test/CodeGen/X86/avgfloors-scalar.ll
index fd303192e6c50..c8bbc875834d1 100644
--- a/llvm/test/CodeGen/X86/avgfloors-scalar.ll
+++ b/llvm/test/CodeGen/X86/avgfloors-scalar.ll
@@ -38,26 +38,20 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
define i8 @test_lsb_i8(i8 %a0, i8 %a1) nounwind {
; X86-LABEL: test_lsb_i8:
; X86: # %bb.0:
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: sarb %dl
-; X86-NEXT: andb %cl, %al
-; X86-NEXT: sarb %cl
-; X86-NEXT: addb %dl, %cl
-; X86-NEXT: andb $1, %al
-; X86-NEXT: addb %cl, %al
+; X86-NEXT: movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: shrl %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_lsb_i8:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: sarb %al
-; X64-NEXT: andb %sil, %dil
-; X64-NEXT: sarb %sil
-; X64-NEXT: addb %sil, %al
-; X64-NEXT: andb $1, %dil
-; X64-NEXT: addb %dil, %al
+; X64-NEXT: movsbl %sil, %ecx
+; X64-NEXT: movsbl %dil, %eax
+; X64-NEXT: addl %ecx, %eax
+; X64-NEXT: shrl %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%s0 = ashr i8 %a0, 1
%s1 = ashr i8 %a1, 1
@@ -124,26 +118,17 @@ define i16 @test_lsb_i16(i16 %a0, i16 %a1) nounwind {
; X86: # %bb.0:
; X86-NEXT: movswl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movswl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: sarl %edx
-; X86-NEXT: andl %ecx, %eax
-; X86-NEXT: sarl %ecx
-; X86-NEXT: addl %edx, %ecx
-; X86-NEXT: andl $1, %eax
; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: shrl %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_lsb_i16:
; X64: # %bb.0:
-; X64-NEXT: movswl %si, %eax
-; X64-NEXT: movswl %di, %ecx
-; X64-NEXT: sarl %ecx
-; X64-NEXT: sarl %eax
+; X64-NEXT: movswl %si, %ecx
+; X64-NEXT: movswl %di, %eax
; X64-NEXT: addl %ecx, %eax
-; X64-NEXT: andl %esi, %edi
-; X64-NEXT: andl $1, %edi
-; X64-NEXT: addl %edi, %eax
+; X64-NEXT: shrl %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%s0 = ashr i16 %a0, 1
@@ -316,21 +301,19 @@ define i64 @test_lsb_i64(i64 %a0, i64 %a1) nounwind {
; X86-NEXT: pushl %edi
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %ebx
-; X86-NEXT: sarl %ebx
-; X86-NEXT: shldl $31, %eax, %edi
+; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: movl %eax, %ebx
+; X86-NEXT: xorl %esi, %ebx
; X86-NEXT: movl %ecx, %edx
+; X86-NEXT: xorl %edi, %edx
+; X86-NEXT: shrdl $1, %edx, %ebx
+; X86-NEXT: andl %edi, %ecx
; X86-NEXT: sarl %edx
-; X86-NEXT: shldl $31, %esi, %ecx
-; X86-NEXT: addl %edi, %ecx
-; X86-NEXT: adcl %ebx, %edx
; X86-NEXT: andl %esi, %eax
-; X86-NEXT: andl $1, %eax
-; X86-NEXT: addl %ecx, %eax
-; X86-NEXT: adcl $0, %edx
+; X86-NEXT: addl %ebx, %eax
+; X86-NEXT: adcl %ecx, %edx
; X86-NEXT: popl %esi
; X86-NEXT: popl %edi
; X86-NEXT: popl %ebx
@@ -338,14 +321,11 @@ define i64 @test_lsb_i64(i64 %a0, i64 %a1) nounwind {
;
; X64-LABEL: test_lsb_i64:
; X64: # %bb.0:
-; X64-NEXT: movq %rdi, %rcx
-; X64-NEXT: sarq %rcx
-; X64-NEXT: andl %esi, %edi
; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: sarq %rax
-; X64-NEXT: addq %rcx, %rax
-; X64-NEXT: andl $1, %edi
-; X64-NEXT: addq %rdi, %rax
+; X64-NEXT: andq %rdi, %rax
+; X64-NEXT: xorq %rdi, %rsi
+; X64-NEXT: sarq %rsi
+; X64-NEXT: addq %rsi, %rax
; X64-NEXT: retq
%s0 = ashr i64 %a0, 1
%s1 = ashr i64 %a1, 1
diff --git a/llvm/test/CodeGen/X86/avgflooru-scalar.ll b/llvm/test/CodeGen/X86/avgflooru-scalar.ll
index 9ae4492bb4cd4..7ad10164ad484 100644
--- a/llvm/test/CodeGen/X86/avgflooru-scalar.ll
+++ b/llvm/test/CodeGen/X86/avgflooru-scalar.ll
@@ -40,24 +40,18 @@ define i8 @test_lsb_i8(i8 %a0, i8 %a1) nounwind {
; X86: # %bb.0:
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: shrb %dl
-; X86-NEXT: andb %cl, %al
-; X86-NEXT: shrb %cl
-; X86-NEXT: addb %dl, %cl
-; X86-NEXT: andb $1, %al
-; X86-NEXT: addb %cl, %al
+; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: shrl %eax
+; X86-NEXT: # kill: def $al killed $al killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_lsb_i8:
; X64: # %bb.0:
-; X64-NEXT: movl %edi, %eax
-; X64-NEXT: shrb %al
-; X64-NEXT: andb %sil, %dil
-; X64-NEXT: shrb %sil
-; X64-NEXT: addb %sil, %al
-; X64-NEXT: andb $1, %dil
-; X64-NEXT: addb %dil, %al
+; X64-NEXT: movzbl %sil, %ecx
+; X64-NEXT: movzbl %dil, %eax
+; X64-NEXT: addl %ecx, %eax
+; X64-NEXT: shrl %eax
+; X64-NEXT: # kill: def $al killed $al killed $eax
; X64-NEXT: retq
%s0 = lshr i8 %a0, 1
%s1 = lshr i8 %a1, 1
@@ -124,26 +118,17 @@ define i16 @test_lsb_i16(i16 %a0, i16 %a1) nounwind {
; X86: # %bb.0:
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl %eax, %edx
-; X86-NEXT: shrl %edx
-; X86-NEXT: andl %ecx, %eax
-; X86-NEXT: shrl %ecx
-; X86-NEXT: addl %edx, %ecx
-; X86-NEXT: andl $1, %eax
; X86-NEXT: addl %ecx, %eax
+; X86-NEXT: shrl %eax
; X86-NEXT: # kill: def $ax killed $ax killed $eax
; X86-NEXT: retl
;
; X64-LABEL: test_lsb_i16:
; X64: # %bb.0:
-; X64-NEXT: movzwl %si, %eax
-; X64-NEXT: movzwl %di, %ecx
-; X64-NEXT: shrl %ecx
-; X64-NEXT: shrl %eax
+; X64-NEXT: movzwl %si, %ecx
+; X64-NEXT: movzwl %di, %eax
; X64-NEXT: addl %ecx, %eax
-; X64-NEXT: andl %esi, %edi
-; X64-NEXT: andl $1, %edi
-; X64-NEXT: addl %edi, %eax
+; X64-NEXT: shrl %eax
; X64-NEXT: # kill: def $ax killed $ax killed $eax
; X64-NEXT: retq
%s0 = lshr i16 %a0, 1
@@ -300,40 +285,23 @@ define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind {
define i64 @test_lsb_i64(i64 %a0, i64 %a1) nounwind {
; X86-LABEL: test_lsb_i64:
; X86: # %bb.0:
-; X86-NEXT: pushl %ebx
-; X86-NEXT: pushl %edi
-; X86-NEXT: pushl %esi
-; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT: movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT: movl %edi, %ebx
-; X86-NEXT: shrl %ebx
-; X86-NEXT: shldl $31, %eax, %edi
-; X86-NEXT: movl %ecx, %edx
-; X86-NEXT: shrl %edx
-; X86-NEXT: shldl $31, %esi, %ecx
-; X86-NEXT: addl %edi, %ecx
-; X86-NEXT: adcl %ebx, %edx
-; X86-NEXT: andl %esi, %eax
-; X86-NEXT: andl $1, %eax
-; X86-NEXT: addl %ecx, %eax
-; X86-NEXT: adcl $0, %edx
-; X86-NEXT: popl %esi
-; X86-NEXT: popl %edi
-; X86-NEXT: popl %ebx
+; X86-NEXT: addl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT: adcl {{[0-9]+}}(%esp), %eax
+; X86-NEXT: setb %dl
+; X86-NEXT: movzbl %dl, %edx
+; X86-NEXT: shldl $31, %eax, %edx
+; X86-NEXT: shldl $31, %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: test_lsb_i64:
; X64: # %bb.0:
-; X64-NEXT: movq %rdi, %rcx
-; X64-NEXT: shrq %rcx
-; X64-NEXT: andl %esi, %edi
; X64-NEXT: movq %rsi, %rax
-; X64-NEXT: shrq %rax
-; X64-NEXT: addq %rcx, %rax
-; X64-NEXT: andl $1, %edi
-; X64-NEXT: addq %rdi, %rax
+; X64-NEXT: andq %rdi, %rax
+; X64-NEXT: xorq %rdi, %rsi
+; X64-NEXT: shrq %rsi
+; X64-NEXT: addq %rsi, %rax
; X64-NEXT: retq
%s0 = lshr i64 %a0, 1
%s1 = lshr i64 %a1, 1
``````````
</details>
https://github.com/llvm/llvm-project/pull/169644
More information about the llvm-commits
mailing list