[llvm] [DAG] Recognise AVGFLOOR (((A >> 1) + (B >> 1)) + (A & B & 1)) patterns (PR #169644)

via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 26 04:14:59 PST 2025


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-x86

Author: Simon Pilgrim (RKSimon)

<details>
<summary>Changes</summary>

Recognise 'LSB' style AVGFLOOR patterns.

I've attempted to use the m_Reassociatable* pattern matchers, but encountered an issue in that we can't correctly match m_Value/m_Deferred pairs in the same reassociation as it appears that we have no guarantees on the order of matching. I'll raise a bug for this, and in the meantime we have the pattern in the test_lsb_i32 tests to show the missed matching opportunity.

Fixes #<!-- -->53648

---
Full diff: https://github.com/llvm/llvm-project/pull/169644.diff


3 Files Affected:

- (modified) llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp (+16-4) 
- (modified) llvm/test/CodeGen/X86/avgfloors-scalar.ll (+27-47) 
- (modified) llvm/test/CodeGen/X86/avgflooru-scalar.ll (+22-54) 


``````````diff
diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 6b79dbb46cadc..813cbeafeaec9 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -3154,19 +3154,31 @@ SDValue DAGCombiner::visitADDLike(SDNode *N) {
 }
 
 // Attempt to form avgfloor(A, B) from (A & B) + ((A ^ B) >> 1)
+// Attempt to form avgfloor(A, B) from ((A >> 1) + (B >> 1)) + (A & B & 1)
 SDValue DAGCombiner::foldAddToAvg(SDNode *N, const SDLoc &DL) {
   SDValue N0 = N->getOperand(0);
   EVT VT = N0.getValueType();
   SDValue A, B;
 
+  // FIXME: m_ReassociatableAdd can't handle m_Value/m_Deferred mixing.
   if ((!LegalOperations || hasOperation(ISD::AVGFLOORU, VT)) &&
-      sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
-                        m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
+      (sd_match(N,
+                m_Add(m_And(m_Value(A), m_Value(B)),
+                      m_Srl(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) ||
+       sd_match(N, m_Add(m_Add(m_Srl(m_Value(A), m_One()),
+                               m_Srl(m_Value(B), m_One())),
+                         m_ReassociatableAnd(m_Deferred(A), m_Deferred(B),
+                                             m_One()))))) {
     return DAG.getNode(ISD::AVGFLOORU, DL, VT, A, B);
   }
   if ((!LegalOperations || hasOperation(ISD::AVGFLOORS, VT)) &&
-      sd_match(N, m_Add(m_And(m_Value(A), m_Value(B)),
-                        m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One())))) {
+      (sd_match(N,
+                m_Add(m_And(m_Value(A), m_Value(B)),
+                      m_Sra(m_Xor(m_Deferred(A), m_Deferred(B)), m_One()))) ||
+       sd_match(N, m_Add(m_Add(m_Sra(m_Value(A), m_One()),
+                               m_Sra(m_Value(B), m_One())),
+                         m_ReassociatableAnd(m_Deferred(A), m_Deferred(B),
+                                             m_One()))))) {
     return DAG.getNode(ISD::AVGFLOORS, DL, VT, A, B);
   }
 
diff --git a/llvm/test/CodeGen/X86/avgfloors-scalar.ll b/llvm/test/CodeGen/X86/avgfloors-scalar.ll
index fd303192e6c50..c8bbc875834d1 100644
--- a/llvm/test/CodeGen/X86/avgfloors-scalar.ll
+++ b/llvm/test/CodeGen/X86/avgfloors-scalar.ll
@@ -38,26 +38,20 @@ define i8 @test_fixed_i8(i8 %a0, i8 %a1) nounwind {
 define i8 @test_lsb_i8(i8 %a0, i8 %a1) nounwind {
 ; X86-LABEL: test_lsb_i8:
 ; X86:       # %bb.0:
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, %edx
-; X86-NEXT:    sarb %dl
-; X86-NEXT:    andb %cl, %al
-; X86-NEXT:    sarb %cl
-; X86-NEXT:    addb %dl, %cl
-; X86-NEXT:    andb $1, %al
-; X86-NEXT:    addb %cl, %al
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movsbl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_lsb_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    sarb %al
-; X64-NEXT:    andb %sil, %dil
-; X64-NEXT:    sarb %sil
-; X64-NEXT:    addb %sil, %al
-; X64-NEXT:    andb $1, %dil
-; X64-NEXT:    addb %dil, %al
+; X64-NEXT:    movsbl %sil, %ecx
+; X64-NEXT:    movsbl %dil, %eax
+; X64-NEXT:    addl %ecx, %eax
+; X64-NEXT:    shrl %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %s0 = ashr i8 %a0, 1
   %s1 = ashr i8 %a1, 1
@@ -124,26 +118,17 @@ define i16 @test_lsb_i16(i16 %a0, i16 %a1) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movswl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movswl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, %edx
-; X86-NEXT:    sarl %edx
-; X86-NEXT:    andl %ecx, %eax
-; X86-NEXT:    sarl %ecx
-; X86-NEXT:    addl %edx, %ecx
-; X86-NEXT:    andl $1, %eax
 ; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    shrl %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_lsb_i16:
 ; X64:       # %bb.0:
-; X64-NEXT:    movswl %si, %eax
-; X64-NEXT:    movswl %di, %ecx
-; X64-NEXT:    sarl %ecx
-; X64-NEXT:    sarl %eax
+; X64-NEXT:    movswl %si, %ecx
+; X64-NEXT:    movswl %di, %eax
 ; X64-NEXT:    addl %ecx, %eax
-; X64-NEXT:    andl %esi, %edi
-; X64-NEXT:    andl $1, %edi
-; X64-NEXT:    addl %edi, %eax
+; X64-NEXT:    shrl %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %s0 = ashr i16 %a0, 1
@@ -316,21 +301,19 @@ define i64 @test_lsb_i64(i64 %a0, i64 %a1) nounwind {
 ; X86-NEXT:    pushl %edi
 ; X86-NEXT:    pushl %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    movl %edi, %ebx
-; X86-NEXT:    sarl %ebx
-; X86-NEXT:    shldl $31, %eax, %edi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %eax, %ebx
+; X86-NEXT:    xorl %esi, %ebx
 ; X86-NEXT:    movl %ecx, %edx
+; X86-NEXT:    xorl %edi, %edx
+; X86-NEXT:    shrdl $1, %edx, %ebx
+; X86-NEXT:    andl %edi, %ecx
 ; X86-NEXT:    sarl %edx
-; X86-NEXT:    shldl $31, %esi, %ecx
-; X86-NEXT:    addl %edi, %ecx
-; X86-NEXT:    adcl %ebx, %edx
 ; X86-NEXT:    andl %esi, %eax
-; X86-NEXT:    andl $1, %eax
-; X86-NEXT:    addl %ecx, %eax
-; X86-NEXT:    adcl $0, %edx
+; X86-NEXT:    addl %ebx, %eax
+; X86-NEXT:    adcl %ecx, %edx
 ; X86-NEXT:    popl %esi
 ; X86-NEXT:    popl %edi
 ; X86-NEXT:    popl %ebx
@@ -338,14 +321,11 @@ define i64 @test_lsb_i64(i64 %a0, i64 %a1) nounwind {
 ;
 ; X64-LABEL: test_lsb_i64:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq %rdi, %rcx
-; X64-NEXT:    sarq %rcx
-; X64-NEXT:    andl %esi, %edi
 ; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    sarq %rax
-; X64-NEXT:    addq %rcx, %rax
-; X64-NEXT:    andl $1, %edi
-; X64-NEXT:    addq %rdi, %rax
+; X64-NEXT:    andq %rdi, %rax
+; X64-NEXT:    xorq %rdi, %rsi
+; X64-NEXT:    sarq %rsi
+; X64-NEXT:    addq %rsi, %rax
 ; X64-NEXT:    retq
   %s0 = ashr i64 %a0, 1
   %s1 = ashr i64 %a1, 1
diff --git a/llvm/test/CodeGen/X86/avgflooru-scalar.ll b/llvm/test/CodeGen/X86/avgflooru-scalar.ll
index 9ae4492bb4cd4..7ad10164ad484 100644
--- a/llvm/test/CodeGen/X86/avgflooru-scalar.ll
+++ b/llvm/test/CodeGen/X86/avgflooru-scalar.ll
@@ -40,24 +40,18 @@ define i8 @test_lsb_i8(i8 %a0, i8 %a1) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, %edx
-; X86-NEXT:    shrb %dl
-; X86-NEXT:    andb %cl, %al
-; X86-NEXT:    shrb %cl
-; X86-NEXT:    addb %dl, %cl
-; X86-NEXT:    andb $1, %al
-; X86-NEXT:    addb %cl, %al
+; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    shrl %eax
+; X86-NEXT:    # kill: def $al killed $al killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_lsb_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    movl %edi, %eax
-; X64-NEXT:    shrb %al
-; X64-NEXT:    andb %sil, %dil
-; X64-NEXT:    shrb %sil
-; X64-NEXT:    addb %sil, %al
-; X64-NEXT:    andb $1, %dil
-; X64-NEXT:    addb %dil, %al
+; X64-NEXT:    movzbl %sil, %ecx
+; X64-NEXT:    movzbl %dil, %eax
+; X64-NEXT:    addl %ecx, %eax
+; X64-NEXT:    shrl %eax
+; X64-NEXT:    # kill: def $al killed $al killed $eax
 ; X64-NEXT:    retq
   %s0 = lshr i8 %a0, 1
   %s1 = lshr i8 %a1, 1
@@ -124,26 +118,17 @@ define i16 @test_lsb_i16(i16 %a0, i16 %a1) nounwind {
 ; X86:       # %bb.0:
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl %eax, %edx
-; X86-NEXT:    shrl %edx
-; X86-NEXT:    andl %ecx, %eax
-; X86-NEXT:    shrl %ecx
-; X86-NEXT:    addl %edx, %ecx
-; X86-NEXT:    andl $1, %eax
 ; X86-NEXT:    addl %ecx, %eax
+; X86-NEXT:    shrl %eax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_lsb_i16:
 ; X64:       # %bb.0:
-; X64-NEXT:    movzwl %si, %eax
-; X64-NEXT:    movzwl %di, %ecx
-; X64-NEXT:    shrl %ecx
-; X64-NEXT:    shrl %eax
+; X64-NEXT:    movzwl %si, %ecx
+; X64-NEXT:    movzwl %di, %eax
 ; X64-NEXT:    addl %ecx, %eax
-; X64-NEXT:    andl %esi, %edi
-; X64-NEXT:    andl $1, %edi
-; X64-NEXT:    addl %edi, %eax
+; X64-NEXT:    shrl %eax
 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X64-NEXT:    retq
   %s0 = lshr i16 %a0, 1
@@ -300,40 +285,23 @@ define i64 @test_fixed_i64(i64 %a0, i64 %a1) nounwind {
 define i64 @test_lsb_i64(i64 %a0, i64 %a1) nounwind {
 ; X86-LABEL: test_lsb_i64:
 ; X86:       # %bb.0:
-; X86-NEXT:    pushl %ebx
-; X86-NEXT:    pushl %edi
-; X86-NEXT:    pushl %esi
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
-; X86-NEXT:    movl %edi, %ebx
-; X86-NEXT:    shrl %ebx
-; X86-NEXT:    shldl $31, %eax, %edi
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    shrl %edx
-; X86-NEXT:    shldl $31, %esi, %ecx
-; X86-NEXT:    addl %edi, %ecx
-; X86-NEXT:    adcl %ebx, %edx
-; X86-NEXT:    andl %esi, %eax
-; X86-NEXT:    andl $1, %eax
-; X86-NEXT:    addl %ecx, %eax
-; X86-NEXT:    adcl $0, %edx
-; X86-NEXT:    popl %esi
-; X86-NEXT:    popl %edi
-; X86-NEXT:    popl %ebx
+; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    adcl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    setb %dl
+; X86-NEXT:    movzbl %dl, %edx
+; X86-NEXT:    shldl $31, %eax, %edx
+; X86-NEXT:    shldl $31, %ecx, %eax
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: test_lsb_i64:
 ; X64:       # %bb.0:
-; X64-NEXT:    movq %rdi, %rcx
-; X64-NEXT:    shrq %rcx
-; X64-NEXT:    andl %esi, %edi
 ; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    shrq %rax
-; X64-NEXT:    addq %rcx, %rax
-; X64-NEXT:    andl $1, %edi
-; X64-NEXT:    addq %rdi, %rax
+; X64-NEXT:    andq %rdi, %rax
+; X64-NEXT:    xorq %rdi, %rsi
+; X64-NEXT:    shrq %rsi
+; X64-NEXT:    addq %rsi, %rax
 ; X64-NEXT:    retq
   %s0 = lshr i64 %a0, 1
   %s1 = lshr i64 %a1, 1

``````````

</details>


https://github.com/llvm/llvm-project/pull/169644


More information about the llvm-commits mailing list