[llvm] [DAG] Fold (and X, (bswap/bitreverse (not Y))) -> (and X, (not (bswap/bitreverse Y))) on ANDNOT capable targets (PR #112547)

Simon Pilgrim via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 25 10:12:33 PDT 2024


https://github.com/RKSimon updated https://github.com/llvm/llvm-project/pull/112547

>From b8996f8664455955ff96671daffc899e0cbd3b6c Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Wed, 16 Oct 2024 14:59:00 +0100
Subject: [PATCH 1/3] [DAG] Fold (and X, (bswap/bitreverse (not Y))) -> (and X,
 (not (bswap/bitreverse Y))) on ANDNOT capable targets

Fixes #112425
---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  15 +
 llvm/test/CodeGen/X86/andnot-patterns.ll      | 439 ++++++++++++------
 2 files changed, 303 insertions(+), 151 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ad2d2ede302af8..ea31ce443d9598 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7353,6 +7353,21 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
     return R;
 
+  // If the target supports ANDNOT, attempt to reconstruct an ANDNOT pattern
+  // that might have become separated by a bitwise-agnostic instruction.
+  if (TLI.hasAndNot(SDValue(N, 0))) {
+    SDValue X, Y;
+
+    // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y)))
+    // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y)))
+    for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE})
+      if (sd_match(N, m_And(m_Value(X),
+                            m_OneUse(m_UnaryOp(Opc, m_Not(m_Value(Y)))))) &&
+          !sd_match(X, m_Not(m_Value())))
+        return DAG.getNode(ISD::AND, DL, VT, X,
+                           DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y), VT));
+  }
+
   // Masking the negated extension of a boolean is just the zero-extended
   // boolean:
   // and (sub 0, zext(bool X)), 1 --> zext(bool X)
diff --git a/llvm/test/CodeGen/X86/andnot-patterns.ll b/llvm/test/CodeGen/X86/andnot-patterns.ll
index 101e4ed008f7b6..d287ca28f5b961 100644
--- a/llvm/test/CodeGen/X86/andnot-patterns.ll
+++ b/llvm/test/CodeGen/X86/andnot-patterns.ll
@@ -321,25 +321,41 @@ define i8 @andnot_rotr_i8(i8 %a0, i8 %a1, i8 %a2) nounwind {
 ;
 
 define i64 @andnot_bswap_i64(i64 %a0, i64 %a1) nounwind {
-; X86-LABEL: andnot_bswap_i64:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    notl %eax
-; X86-NEXT:    notl %edx
-; X86-NEXT:    bswapl %edx
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    retl
+; X86-NOBMI-LABEL: andnot_bswap_i64:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    notl %eax
+; X86-NOBMI-NEXT:    notl %edx
+; X86-NOBMI-NEXT:    bswapl %edx
+; X86-NOBMI-NEXT:    bswapl %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    retl
 ;
-; X64-LABEL: andnot_bswap_i64:
-; X64:       # %bb.0:
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    notq %rax
-; X64-NEXT:    bswapq %rax
-; X64-NEXT:    andq %rdi, %rax
-; X64-NEXT:    retq
+; X86-BMI-LABEL: andnot_bswap_i64:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    bswapl %eax
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT:    bswapl %ecx
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: andnot_bswap_i64:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rsi, %rax
+; X64-NOBMI-NEXT:    notq %rax
+; X64-NOBMI-NEXT:    bswapq %rax
+; X64-NOBMI-NEXT:    andq %rdi, %rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: andnot_bswap_i64:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    bswapq %rsi
+; X64-BMI-NEXT:    andnq %rdi, %rsi, %rax
+; X64-BMI-NEXT:    retq
   %not = xor i64 %a1, -1
   %bswap = tail call i64 @llvm.bswap.i64(i64 %not)
   %and = and i64 %bswap, %a0
@@ -347,21 +363,34 @@ define i64 @andnot_bswap_i64(i64 %a0, i64 %a1) nounwind {
 }
 
 define i32 @andnot_bswap_i32(i32 %a0, i32 %a1) nounwind {
-; X86-LABEL: andnot_bswap_i32:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    notl %eax
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    retl
+; X86-NOBMI-LABEL: andnot_bswap_i32:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    notl %eax
+; X86-NOBMI-NEXT:    bswapl %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    retl
 ;
-; X64-LABEL: andnot_bswap_i32:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    notl %eax
-; X64-NEXT:    bswapl %eax
-; X64-NEXT:    andl %edi, %eax
-; X64-NEXT:    retq
+; X86-BMI-LABEL: andnot_bswap_i32:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    bswapl %eax
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: andnot_bswap_i32:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %eax
+; X64-NOBMI-NEXT:    notl %eax
+; X64-NOBMI-NEXT:    bswapl %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: andnot_bswap_i32:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    bswapl %esi
+; X64-BMI-NEXT:    andnl %edi, %esi, %eax
+; X64-BMI-NEXT:    retq
   %not = xor i32 %a1, -1
   %bswap = tail call i32 @llvm.bswap.i32(i32 %not)
   %and = and i32 %bswap, %a0
@@ -397,75 +426,142 @@ define i16 @andnot_bswap_i16(i16 %a0, i16 %a1) nounwind {
 ;
 
 define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
-; X86-LABEL: andnot_bitreverse_i64:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    notl %eax
-; X86-NEXT:    notl %ecx
-; X86-NEXT:    bswapl %ecx
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
-; X86-NEXT:    shll $4, %edx
-; X86-NEXT:    shrl $4, %ecx
-; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT:    orl %edx, %ecx
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    andl $858993459, %edx # imm = 0x33333333
-; X86-NEXT:    shrl $2, %ecx
-; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT:    leal (%ecx,%edx,4), %ecx
-; X86-NEXT:    movl %ecx, %edx
-; X86-NEXT:    andl $1431655765, %edx # imm = 0x55555555
-; X86-NEXT:    shrl %ecx
-; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT:    leal (%ecx,%edx,2), %edx
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT:    shll $4, %ecx
-; X86-NEXT:    shrl $4, %eax
-; X86-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT:    shrl $2, %eax
-; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X86-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT:    shrl %eax
-; X86-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X86-NEXT:    leal (%eax,%ecx,2), %eax
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
-; X86-NEXT:    retl
+; X86-NOBMI-LABEL: andnot_bitreverse_i64:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    notl %eax
+; X86-NOBMI-NEXT:    notl %ecx
+; X86-NOBMI-NEXT:    bswapl %ecx
+; X86-NOBMI-NEXT:    movl %ecx, %edx
+; X86-NOBMI-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
+; X86-NOBMI-NEXT:    shll $4, %edx
+; X86-NOBMI-NEXT:    shrl $4, %ecx
+; X86-NOBMI-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NOBMI-NEXT:    orl %edx, %ecx
+; X86-NOBMI-NEXT:    movl %ecx, %edx
+; X86-NOBMI-NEXT:    andl $858993459, %edx # imm = 0x33333333
+; X86-NOBMI-NEXT:    shrl $2, %ecx
+; X86-NOBMI-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-NOBMI-NEXT:    leal (%ecx,%edx,4), %ecx
+; X86-NOBMI-NEXT:    movl %ecx, %edx
+; X86-NOBMI-NEXT:    andl $1431655765, %edx # imm = 0x55555555
+; X86-NOBMI-NEXT:    shrl %ecx
+; X86-NOBMI-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-NOBMI-NEXT:    leal (%ecx,%edx,2), %edx
+; X86-NOBMI-NEXT:    bswapl %eax
+; X86-NOBMI-NEXT:    movl %eax, %ecx
+; X86-NOBMI-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NOBMI-NEXT:    shll $4, %ecx
+; X86-NOBMI-NEXT:    shrl $4, %eax
+; X86-NOBMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; X86-NOBMI-NEXT:    orl %ecx, %eax
+; X86-NOBMI-NEXT:    movl %eax, %ecx
+; X86-NOBMI-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-NOBMI-NEXT:    shrl $2, %eax
+; X86-NOBMI-NEXT:    andl $858993459, %eax # imm = 0x33333333
+; X86-NOBMI-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-NOBMI-NEXT:    movl %eax, %ecx
+; X86-NOBMI-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-NOBMI-NEXT:    shrl %eax
+; X86-NOBMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; X86-NOBMI-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NOBMI-NEXT:    retl
 ;
-; X64-LABEL: andnot_bitreverse_i64:
-; X64:       # %bb.0:
-; X64-NEXT:    notq %rsi
-; X64-NEXT:    bswapq %rsi
-; X64-NEXT:    movq %rsi, %rax
-; X64-NEXT:    shrq $4, %rax
-; X64-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
-; X64-NEXT:    andq %rcx, %rax
-; X64-NEXT:    andq %rcx, %rsi
-; X64-NEXT:    shlq $4, %rsi
-; X64-NEXT:    orq %rax, %rsi
-; X64-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
-; X64-NEXT:    movq %rsi, %rcx
-; X64-NEXT:    andq %rax, %rcx
-; X64-NEXT:    shrq $2, %rsi
-; X64-NEXT:    andq %rax, %rsi
-; X64-NEXT:    leaq (%rsi,%rcx,4), %rax
-; X64-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
-; X64-NEXT:    movq %rax, %rdx
-; X64-NEXT:    andq %rcx, %rdx
-; X64-NEXT:    shrq %rax
-; X64-NEXT:    andq %rcx, %rax
-; X64-NEXT:    leaq (%rax,%rdx,2), %rax
-; X64-NEXT:    andq %rdi, %rax
-; X64-NEXT:    retq
+; X86-BMI-LABEL: andnot_bitreverse_i64:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    bswapl %eax
+; X86-BMI-NEXT:    movl %eax, %edx
+; X86-BMI-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
+; X86-BMI-NEXT:    shll $4, %edx
+; X86-BMI-NEXT:    shrl $4, %eax
+; X86-BMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; X86-BMI-NEXT:    orl %edx, %eax
+; X86-BMI-NEXT:    movl %eax, %edx
+; X86-BMI-NEXT:    andl $858993459, %edx # imm = 0x33333333
+; X86-BMI-NEXT:    shrl $2, %eax
+; X86-BMI-NEXT:    andl $858993459, %eax # imm = 0x33333333
+; X86-BMI-NEXT:    leal (%eax,%edx,4), %eax
+; X86-BMI-NEXT:    movl %eax, %edx
+; X86-BMI-NEXT:    andl $1431655765, %edx # imm = 0x55555555
+; X86-BMI-NEXT:    shrl %eax
+; X86-BMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; X86-BMI-NEXT:    leal (%eax,%edx,2), %eax
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT:    bswapl %ecx
+; X86-BMI-NEXT:    movl %ecx, %edx
+; X86-BMI-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
+; X86-BMI-NEXT:    shll $4, %edx
+; X86-BMI-NEXT:    shrl $4, %ecx
+; X86-BMI-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-BMI-NEXT:    orl %edx, %ecx
+; X86-BMI-NEXT:    movl %ecx, %edx
+; X86-BMI-NEXT:    andl $858993459, %edx # imm = 0x33333333
+; X86-BMI-NEXT:    shrl $2, %ecx
+; X86-BMI-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-BMI-NEXT:    leal (%ecx,%edx,4), %ecx
+; X86-BMI-NEXT:    movl %ecx, %edx
+; X86-BMI-NEXT:    andl $1431655765, %edx # imm = 0x55555555
+; X86-BMI-NEXT:    shrl %ecx
+; X86-BMI-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-BMI-NEXT:    leal (%ecx,%edx,2), %ecx
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %ecx, %edx
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: andnot_bitreverse_i64:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    notq %rsi
+; X64-NOBMI-NEXT:    bswapq %rsi
+; X64-NOBMI-NEXT:    movq %rsi, %rax
+; X64-NOBMI-NEXT:    shrq $4, %rax
+; X64-NOBMI-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
+; X64-NOBMI-NEXT:    andq %rcx, %rax
+; X64-NOBMI-NEXT:    andq %rcx, %rsi
+; X64-NOBMI-NEXT:    shlq $4, %rsi
+; X64-NOBMI-NEXT:    orq %rax, %rsi
+; X64-NOBMI-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-NOBMI-NEXT:    movq %rsi, %rcx
+; X64-NOBMI-NEXT:    andq %rax, %rcx
+; X64-NOBMI-NEXT:    shrq $2, %rsi
+; X64-NOBMI-NEXT:    andq %rax, %rsi
+; X64-NOBMI-NEXT:    leaq (%rsi,%rcx,4), %rax
+; X64-NOBMI-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-NOBMI-NEXT:    movq %rax, %rdx
+; X64-NOBMI-NEXT:    andq %rcx, %rdx
+; X64-NOBMI-NEXT:    shrq %rax
+; X64-NOBMI-NEXT:    andq %rcx, %rax
+; X64-NOBMI-NEXT:    leaq (%rax,%rdx,2), %rax
+; X64-NOBMI-NEXT:    andq %rdi, %rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: andnot_bitreverse_i64:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    bswapq %rsi
+; X64-BMI-NEXT:    movq %rsi, %rax
+; X64-BMI-NEXT:    shrq $4, %rax
+; X64-BMI-NEXT:    movabsq $1085102592571150095, %rcx # imm = 0xF0F0F0F0F0F0F0F
+; X64-BMI-NEXT:    andq %rcx, %rax
+; X64-BMI-NEXT:    andq %rcx, %rsi
+; X64-BMI-NEXT:    shlq $4, %rsi
+; X64-BMI-NEXT:    orq %rax, %rsi
+; X64-BMI-NEXT:    movabsq $3689348814741910323, %rax # imm = 0x3333333333333333
+; X64-BMI-NEXT:    movq %rsi, %rcx
+; X64-BMI-NEXT:    andq %rax, %rcx
+; X64-BMI-NEXT:    shrq $2, %rsi
+; X64-BMI-NEXT:    andq %rax, %rsi
+; X64-BMI-NEXT:    leaq (%rsi,%rcx,4), %rax
+; X64-BMI-NEXT:    movabsq $6148914691236517205, %rcx # imm = 0x5555555555555555
+; X64-BMI-NEXT:    movq %rax, %rdx
+; X64-BMI-NEXT:    andq %rcx, %rdx
+; X64-BMI-NEXT:    shrq %rax
+; X64-BMI-NEXT:    andq %rcx, %rax
+; X64-BMI-NEXT:    leaq (%rax,%rdx,2), %rax
+; X64-BMI-NEXT:    andnq %rdi, %rax, %rax
+; X64-BMI-NEXT:    retq
   %not = xor i64 %a1, -1
   %bitrev = tail call i64 @llvm.bitreverse.i64(i64 %not)
   %and = and i64 %bitrev, %a0
@@ -473,53 +569,99 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
 }
 
 define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind {
-; X86-LABEL: andnot_bitreverse_i32:
-; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    notl %eax
-; X86-NEXT:    bswapl %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NEXT:    shll $4, %ecx
-; X86-NEXT:    shrl $4, %eax
-; X86-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; X86-NEXT:    orl %ecx, %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; X86-NEXT:    shrl $2, %eax
-; X86-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X86-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-NEXT:    movl %eax, %ecx
-; X86-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X86-NEXT:    shrl %eax
-; X86-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X86-NEXT:    leal (%eax,%ecx,2), %eax
-; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    retl
+; X86-NOBMI-LABEL: andnot_bitreverse_i32:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    notl %eax
+; X86-NOBMI-NEXT:    bswapl %eax
+; X86-NOBMI-NEXT:    movl %eax, %ecx
+; X86-NOBMI-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-NOBMI-NEXT:    shll $4, %ecx
+; X86-NOBMI-NEXT:    shrl $4, %eax
+; X86-NOBMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; X86-NOBMI-NEXT:    orl %ecx, %eax
+; X86-NOBMI-NEXT:    movl %eax, %ecx
+; X86-NOBMI-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-NOBMI-NEXT:    shrl $2, %eax
+; X86-NOBMI-NEXT:    andl $858993459, %eax # imm = 0x33333333
+; X86-NOBMI-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-NOBMI-NEXT:    movl %eax, %ecx
+; X86-NOBMI-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-NOBMI-NEXT:    shrl %eax
+; X86-NOBMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; X86-NOBMI-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    retl
 ;
-; X64-LABEL: andnot_bitreverse_i32:
-; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NEXT:    notl %esi
-; X64-NEXT:    bswapl %esi
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; X64-NEXT:    shll $4, %eax
-; X64-NEXT:    shrl $4, %esi
-; X64-NEXT:    andl $252645135, %esi # imm = 0xF0F0F0F
-; X64-NEXT:    orl %eax, %esi
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X64-NEXT:    shrl $2, %esi
-; X64-NEXT:    andl $858993459, %esi # imm = 0x33333333
-; X64-NEXT:    leal (%rsi,%rax,4), %eax
-; X64-NEXT:    movl %eax, %ecx
-; X64-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X64-NEXT:    shrl %eax
-; X64-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X64-NEXT:    leal (%rax,%rcx,2), %eax
-; X64-NEXT:    andl %edi, %eax
-; X64-NEXT:    retq
+; X86-BMI-LABEL: andnot_bitreverse_i32:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    bswapl %eax
+; X86-BMI-NEXT:    movl %eax, %ecx
+; X86-BMI-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
+; X86-BMI-NEXT:    shll $4, %ecx
+; X86-BMI-NEXT:    shrl $4, %eax
+; X86-BMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; X86-BMI-NEXT:    orl %ecx, %eax
+; X86-BMI-NEXT:    movl %eax, %ecx
+; X86-BMI-NEXT:    andl $858993459, %ecx # imm = 0x33333333
+; X86-BMI-NEXT:    shrl $2, %eax
+; X86-BMI-NEXT:    andl $858993459, %eax # imm = 0x33333333
+; X86-BMI-NEXT:    leal (%eax,%ecx,4), %eax
+; X86-BMI-NEXT:    movl %eax, %ecx
+; X86-BMI-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X86-BMI-NEXT:    shrl %eax
+; X86-BMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; X86-BMI-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: andnot_bitreverse_i32:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NOBMI-NEXT:    notl %esi
+; X64-NOBMI-NEXT:    bswapl %esi
+; X64-NOBMI-NEXT:    movl %esi, %eax
+; X64-NOBMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; X64-NOBMI-NEXT:    shll $4, %eax
+; X64-NOBMI-NEXT:    shrl $4, %esi
+; X64-NOBMI-NEXT:    andl $252645135, %esi # imm = 0xF0F0F0F
+; X64-NOBMI-NEXT:    orl %eax, %esi
+; X64-NOBMI-NEXT:    movl %esi, %eax
+; X64-NOBMI-NEXT:    andl $858993459, %eax # imm = 0x33333333
+; X64-NOBMI-NEXT:    shrl $2, %esi
+; X64-NOBMI-NEXT:    andl $858993459, %esi # imm = 0x33333333
+; X64-NOBMI-NEXT:    leal (%rsi,%rax,4), %eax
+; X64-NOBMI-NEXT:    movl %eax, %ecx
+; X64-NOBMI-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X64-NOBMI-NEXT:    shrl %eax
+; X64-NOBMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; X64-NOBMI-NEXT:    leal (%rax,%rcx,2), %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: andnot_bitreverse_i32:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI-NEXT:    bswapl %esi
+; X64-BMI-NEXT:    movl %esi, %eax
+; X64-BMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; X64-BMI-NEXT:    shll $4, %eax
+; X64-BMI-NEXT:    shrl $4, %esi
+; X64-BMI-NEXT:    andl $252645135, %esi # imm = 0xF0F0F0F
+; X64-BMI-NEXT:    orl %eax, %esi
+; X64-BMI-NEXT:    movl %esi, %eax
+; X64-BMI-NEXT:    andl $858993459, %eax # imm = 0x33333333
+; X64-BMI-NEXT:    shrl $2, %esi
+; X64-BMI-NEXT:    andl $858993459, %esi # imm = 0x33333333
+; X64-BMI-NEXT:    leal (%rsi,%rax,4), %eax
+; X64-BMI-NEXT:    movl %eax, %ecx
+; X64-BMI-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
+; X64-BMI-NEXT:    shrl %eax
+; X64-BMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; X64-BMI-NEXT:    leal (%rax,%rcx,2), %eax
+; X64-BMI-NEXT:    andnl %edi, %eax, %eax
+; X64-BMI-NEXT:    retq
   %not = xor i32 %a1, -1
   %bitrev = tail call i32 @llvm.bitreverse.i32(i32 %not)
   %and = and i32 %bitrev, %a0
@@ -626,8 +768,3 @@ define i8 @andnot_bitreverse_i8(i8 %a0, i8 %a1) nounwind {
   %and = and i8 %bitrev, %a0
   ret i8 %and
 }
-;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
-; X64-BMI: {{.*}}
-; X64-NOBMI: {{.*}}
-; X86-BMI: {{.*}}
-; X86-NOBMI: {{.*}}

>From 9ba8f813aee7dc95728c6b26f337faf08f4ba4fd Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Fri, 18 Oct 2024 18:10:08 +0100
Subject: [PATCH 2/3] [X86] andnot-patterns.ll - add additional multiuse tests

---
 llvm/test/CodeGen/X86/andnot-patterns.ll | 143 ++++++++++++++++++++++-
 1 file changed, 140 insertions(+), 3 deletions(-)

diff --git a/llvm/test/CodeGen/X86/andnot-patterns.ll b/llvm/test/CodeGen/X86/andnot-patterns.ll
index d287ca28f5b961..c5eb365fb7db03 100644
--- a/llvm/test/CodeGen/X86/andnot-patterns.ll
+++ b/llvm/test/CodeGen/X86/andnot-patterns.ll
@@ -7,6 +7,7 @@
 ; TODO - PR112425 - attempt to reconstruct andnot patterns through bitwise-agnostic operations
 
 declare void @use_i64(i64)
+declare void @use_i32(i32)
 
 ;
 ; Fold (and X, (rotl (not Y), Z))) -> (and X, (not (rotl Y, Z)))
@@ -132,8 +133,8 @@ define i8 @andnot_rotl_i8(i8 %a0, i8 %a1, i8 %a2) nounwind {
   ret i8 %and
 }
 
-define i64 @andnot_rotl_i64_multiuse(i64 %a0, i64 %a1, i64 %a2) nounwind {
-; X86-LABEL: andnot_rotl_i64_multiuse:
+define i64 @andnot_rotl_i64_multiuse_rot(i64 %a0, i64 %a1, i64 %a2) nounwind {
+; X86-LABEL: andnot_rotl_i64_multiuse_rot:
 ; X86:       # %bb.0:
 ; X86-NEXT:    pushl %ebx
 ; X86-NEXT:    pushl %edi
@@ -171,7 +172,7 @@ define i64 @andnot_rotl_i64_multiuse(i64 %a0, i64 %a1, i64 %a2) nounwind {
 ; X86-NEXT:    popl %ebx
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: andnot_rotl_i64_multiuse:
+; X64-LABEL: andnot_rotl_i64_multiuse_rot:
 ; X64:       # %bb.0:
 ; X64-NEXT:    pushq %rbx
 ; X64-NEXT:    movq %rdx, %rcx
@@ -316,6 +317,44 @@ define i8 @andnot_rotr_i8(i8 %a0, i8 %a1, i8 %a2) nounwind {
   ret i8 %and
 }
 
+define i32 @andnot_rotr_i32_multiuse_not(i32 %a0, i32 %a1, i32 %a2) nounwind {
+; X86-LABEL: andnot_rotr_i32_multiuse_not:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    notl %eax
+; X86-NEXT:    movl %eax, %esi
+; X86-NEXT:    rorl %cl, %esi
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll use_i32 at PLT
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: andnot_rotr_i32_multiuse_not:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movl %edx, %ecx
+; X64-NEXT:    notl %esi
+; X64-NEXT:    movl %esi, %ebx
+; X64-NEXT:    # kill: def $cl killed $cl killed $ecx
+; X64-NEXT:    rorl %cl, %ebx
+; X64-NEXT:    andl %edi, %ebx
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    callq use_i32 at PLT
+; X64-NEXT:    movl %ebx, %eax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    retq
+  %not = xor i32 %a1, -1
+  %rot = tail call i32 @llvm.fshr.i32(i32 %not, i32 %not, i32 %a2)
+  %and = and i32 %rot, %a0
+  call void @use_i32(i32 %not)
+  ret i32 %and
+}
+
 ;
 ; Fold (and X, (bswap (not Y)))) -> (and X, (not (bswap Y)))
 ;
@@ -421,6 +460,104 @@ define i16 @andnot_bswap_i16(i16 %a0, i16 %a1) nounwind {
   ret i16 %and
 }
 
+define i32 @andnot_bswap_i32_multiuse_bswap(i32 %a0, i32 %a1) nounwind {
+; X86-LABEL: andnot_bswap_i32_multiuse_bswap:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    notl %eax
+; X86-NEXT:    bswapl %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    andl %eax, %esi
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll use_i32 at PLT
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: andnot_bswap_i32_multiuse_bswap:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movl %edi, %ebx
+; X64-NEXT:    notl %esi
+; X64-NEXT:    bswapl %esi
+; X64-NEXT:    andl %esi, %ebx
+; X64-NEXT:    movl %esi, %edi
+; X64-NEXT:    callq use_i32 at PLT
+; X64-NEXT:    movl %ebx, %eax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    retq
+  %not = xor i32 %a1, -1
+  %bswap = tail call i32 @llvm.bswap.i32(i32 %not)
+  %and = and i32 %bswap, %a0
+  call void @use_i32(i32 %bswap)
+  ret i32 %and
+}
+
+define i32 @andnot_bswap_i32_multiuse_not(i32 %a0, i32 %a1) nounwind {
+; X86-NOBMI-LABEL: andnot_bswap_i32_multiuse_not:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    pushl %esi
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    notl %eax
+; X86-NOBMI-NEXT:    movl %eax, %esi
+; X86-NOBMI-NEXT:    bswapl %esi
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-NOBMI-NEXT:    pushl %eax
+; X86-NOBMI-NEXT:    calll use_i32 at PLT
+; X86-NOBMI-NEXT:    addl $4, %esp
+; X86-NOBMI-NEXT:    movl %esi, %eax
+; X86-NOBMI-NEXT:    popl %esi
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: andnot_bswap_i32_multiuse_not:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    pushl %esi
+; X86-BMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    movl %eax, %ecx
+; X86-BMI-NEXT:    notl %ecx
+; X86-BMI-NEXT:    bswapl %eax
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %esi
+; X86-BMI-NEXT:    pushl %ecx
+; X86-BMI-NEXT:    calll use_i32 at PLT
+; X86-BMI-NEXT:    addl $4, %esp
+; X86-BMI-NEXT:    movl %esi, %eax
+; X86-BMI-NEXT:    popl %esi
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: andnot_bswap_i32_multiuse_not:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    pushq %rbx
+; X64-NOBMI-NEXT:    notl %esi
+; X64-NOBMI-NEXT:    movl %esi, %ebx
+; X64-NOBMI-NEXT:    bswapl %ebx
+; X64-NOBMI-NEXT:    andl %edi, %ebx
+; X64-NOBMI-NEXT:    movl %esi, %edi
+; X64-NOBMI-NEXT:    callq use_i32 at PLT
+; X64-NOBMI-NEXT:    movl %ebx, %eax
+; X64-NOBMI-NEXT:    popq %rbx
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: andnot_bswap_i32_multiuse_not:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    pushq %rbx
+; X64-BMI-NEXT:    movl %esi, %eax
+; X64-BMI-NEXT:    notl %eax
+; X64-BMI-NEXT:    bswapl %esi
+; X64-BMI-NEXT:    andnl %edi, %esi, %ebx
+; X64-BMI-NEXT:    movl %eax, %edi
+; X64-BMI-NEXT:    callq use_i32 at PLT
+; X64-BMI-NEXT:    movl %ebx, %eax
+; X64-BMI-NEXT:    popq %rbx
+; X64-BMI-NEXT:    retq
+  %not = xor i32 %a1, -1
+  %bswap = tail call i32 @llvm.bswap.i32(i32 %not)
+  %and = and i32 %bswap, %a0
+  call void @use_i32(i32 %not)
+  ret i32 %and
+}
+
 ;
 ; Fold (and X, (bitreverse (not Y)))) -> (and X, (not (bitreverse Y)))
 ;

>From 77adbb325415b088e0f2eab2f9828f7b4856c6c1 Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Fri, 18 Oct 2024 18:12:21 +0100
Subject: [PATCH 3/3] Relax ANDNOT requirement to be only for multiple uses of
 the NOT

---
 llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp |  24 ++-
 llvm/test/CodeGen/X86/andnot-patterns.ll      | 163 +++++++++++-------
 2 files changed, 107 insertions(+), 80 deletions(-)

diff --git a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index ea31ce443d9598..b800204d917503 100644
--- a/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -7353,20 +7353,16 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
   if (SDValue R = foldLogicOfShifts(N, N1, N0, DAG))
     return R;
 
-  // If the target supports ANDNOT, attempt to reconstruct an ANDNOT pattern
-  // that might have become separated by a bitwise-agnostic instruction.
-  if (TLI.hasAndNot(SDValue(N, 0))) {
-    SDValue X, Y;
-
-    // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y)))
-    // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y)))
-    for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE})
-      if (sd_match(N, m_And(m_Value(X),
-                            m_OneUse(m_UnaryOp(Opc, m_Not(m_Value(Y)))))) &&
-          !sd_match(X, m_Not(m_Value())))
-        return DAG.getNode(ISD::AND, DL, VT, X,
-                           DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y), VT));
-  }
+  // Fold (and X, (bswap (not Y))) -> (and X, (not (bswap Y)))
+  // Fold (and X, (bitreverse (not Y))) -> (and X, (not (bitreverse Y)))
+  SDValue X, Y, NotY;
+  for (unsigned Opc : {ISD::BSWAP, ISD::BITREVERSE})
+    if (sd_match(N,
+                 m_And(m_Value(X), m_OneUse(m_UnaryOp(Opc, m_Value(NotY))))) &&
+        sd_match(NotY, m_Not(m_Value(Y))) &&
+        (TLI.hasAndNot(SDValue(N, 0)) || NotY->hasOneUse()))
+      return DAG.getNode(ISD::AND, DL, VT, X,
+                         DAG.getNOT(DL, DAG.getNode(Opc, DL, VT, Y), VT));
 
   // Masking the negated extension of a boolean is just the zero-extended
   // boolean:
diff --git a/llvm/test/CodeGen/X86/andnot-patterns.ll b/llvm/test/CodeGen/X86/andnot-patterns.ll
index c5eb365fb7db03..1df29f0b12d1b6 100644
--- a/llvm/test/CodeGen/X86/andnot-patterns.ll
+++ b/llvm/test/CodeGen/X86/andnot-patterns.ll
@@ -364,11 +364,11 @@ define i64 @andnot_bswap_i64(i64 %a0, i64 %a1) nounwind {
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %edx
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    notl %eax
-; X86-NOBMI-NEXT:    notl %edx
-; X86-NOBMI-NEXT:    bswapl %edx
 ; X86-NOBMI-NEXT:    bswapl %eax
+; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    bswapl %edx
+; X86-NOBMI-NEXT:    notl %edx
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
 ; X86-NOBMI-NEXT:    retl
 ;
@@ -385,8 +385,8 @@ define i64 @andnot_bswap_i64(i64 %a0, i64 %a1) nounwind {
 ; X64-NOBMI-LABEL: andnot_bswap_i64:
 ; X64-NOBMI:       # %bb.0:
 ; X64-NOBMI-NEXT:    movq %rsi, %rax
-; X64-NOBMI-NEXT:    notq %rax
 ; X64-NOBMI-NEXT:    bswapq %rax
+; X64-NOBMI-NEXT:    notq %rax
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
@@ -405,8 +405,8 @@ define i32 @andnot_bswap_i32(i32 %a0, i32 %a1) nounwind {
 ; X86-NOBMI-LABEL: andnot_bswap_i32:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    bswapl %eax
+; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
@@ -420,8 +420,8 @@ define i32 @andnot_bswap_i32(i32 %a0, i32 %a1) nounwind {
 ; X64-NOBMI-LABEL: andnot_bswap_i32:
 ; X64-NOBMI:       # %bb.0:
 ; X64-NOBMI-NEXT:    movl %esi, %eax
-; X64-NOBMI-NEXT:    notl %eax
 ; X64-NOBMI-NEXT:    bswapl %eax
+; X64-NOBMI-NEXT:    notl %eax
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
@@ -439,21 +439,28 @@ define i32 @andnot_bswap_i32(i32 %a0, i32 %a1) nounwind {
 define i16 @andnot_bswap_i16(i16 %a0, i16 %a1) nounwind {
 ; X86-LABEL: andnot_bswap_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    notl %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    rolw $8, %ax
+; X86-NEXT:    notl %eax
 ; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: andnot_bswap_i16:
-; X64:       # %bb.0:
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    notl %eax
-; X64-NEXT:    rolw $8, %ax
-; X64-NEXT:    andl %edi, %eax
-; X64-NEXT:    # kill: def $ax killed $ax killed $eax
-; X64-NEXT:    retq
+; X64-NOBMI-LABEL: andnot_bswap_i16:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %esi, %eax
+; X64-NOBMI-NEXT:    rolw $8, %ax
+; X64-NOBMI-NEXT:    notl %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: andnot_bswap_i16:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    rolw $8, %si
+; X64-BMI-NEXT:    andnl %edi, %esi, %eax
+; X64-BMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-BMI-NEXT:    retq
   %not = xor i16 %a1, -1
   %bswap = tail call i16 @llvm.bswap.i16(i16 %not)
   %and = and i16 %bswap, %a0
@@ -567,8 +574,25 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    bswapl %eax
+; X86-NOBMI-NEXT:    movl %eax, %edx
+; X86-NOBMI-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
+; X86-NOBMI-NEXT:    shll $4, %edx
+; X86-NOBMI-NEXT:    shrl $4, %eax
+; X86-NOBMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
+; X86-NOBMI-NEXT:    orl %edx, %eax
+; X86-NOBMI-NEXT:    movl %eax, %edx
+; X86-NOBMI-NEXT:    andl $858993459, %edx # imm = 0x33333333
+; X86-NOBMI-NEXT:    shrl $2, %eax
+; X86-NOBMI-NEXT:    andl $858993459, %eax # imm = 0x33333333
+; X86-NOBMI-NEXT:    leal (%eax,%edx,4), %eax
+; X86-NOBMI-NEXT:    movl %eax, %edx
+; X86-NOBMI-NEXT:    andl $1431655765, %edx # imm = 0x55555555
+; X86-NOBMI-NEXT:    shrl %eax
+; X86-NOBMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
+; X86-NOBMI-NEXT:    leal (%eax,%edx,2), %eax
 ; X86-NOBMI-NEXT:    notl %eax
-; X86-NOBMI-NEXT:    notl %ecx
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    bswapl %ecx
 ; X86-NOBMI-NEXT:    movl %ecx, %edx
 ; X86-NOBMI-NEXT:    andl $252645135, %edx # imm = 0xF0F0F0F
@@ -586,24 +610,7 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
 ; X86-NOBMI-NEXT:    shrl %ecx
 ; X86-NOBMI-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
 ; X86-NOBMI-NEXT:    leal (%ecx,%edx,2), %edx
-; X86-NOBMI-NEXT:    bswapl %eax
-; X86-NOBMI-NEXT:    movl %eax, %ecx
-; X86-NOBMI-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
-; X86-NOBMI-NEXT:    shll $4, %ecx
-; X86-NOBMI-NEXT:    shrl $4, %eax
-; X86-NOBMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
-; X86-NOBMI-NEXT:    orl %ecx, %eax
-; X86-NOBMI-NEXT:    movl %eax, %ecx
-; X86-NOBMI-NEXT:    andl $858993459, %ecx # imm = 0x33333333
-; X86-NOBMI-NEXT:    shrl $2, %eax
-; X86-NOBMI-NEXT:    andl $858993459, %eax # imm = 0x33333333
-; X86-NOBMI-NEXT:    leal (%eax,%ecx,4), %eax
-; X86-NOBMI-NEXT:    movl %eax, %ecx
-; X86-NOBMI-NEXT:    andl $1431655765, %ecx # imm = 0x55555555
-; X86-NOBMI-NEXT:    shrl %eax
-; X86-NOBMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
-; X86-NOBMI-NEXT:    leal (%eax,%ecx,2), %eax
-; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    notl %edx
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %edx
 ; X86-NOBMI-NEXT:    retl
 ;
@@ -651,7 +658,6 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
 ;
 ; X64-NOBMI-LABEL: andnot_bitreverse_i64:
 ; X64-NOBMI:       # %bb.0:
-; X64-NOBMI-NEXT:    notq %rsi
 ; X64-NOBMI-NEXT:    bswapq %rsi
 ; X64-NOBMI-NEXT:    movq %rsi, %rax
 ; X64-NOBMI-NEXT:    shrq $4, %rax
@@ -672,6 +678,7 @@ define i64 @andnot_bitreverse_i64(i64 %a0, i64 %a1) nounwind {
 ; X64-NOBMI-NEXT:    shrq %rax
 ; X64-NOBMI-NEXT:    andq %rcx, %rax
 ; X64-NOBMI-NEXT:    leaq (%rax,%rdx,2), %rax
+; X64-NOBMI-NEXT:    notq %rax
 ; X64-NOBMI-NEXT:    andq %rdi, %rax
 ; X64-NOBMI-NEXT:    retq
 ;
@@ -709,7 +716,6 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind {
 ; X86-NOBMI-LABEL: andnot_bitreverse_i32:
 ; X86-NOBMI:       # %bb.0:
 ; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    bswapl %eax
 ; X86-NOBMI-NEXT:    movl %eax, %ecx
 ; X86-NOBMI-NEXT:    andl $252645135, %ecx # imm = 0xF0F0F0F
@@ -727,6 +733,7 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind {
 ; X86-NOBMI-NEXT:    shrl %eax
 ; X86-NOBMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
 ; X86-NOBMI-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NOBMI-NEXT:    notl %eax
 ; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
 ; X86-NOBMI-NEXT:    retl
 ;
@@ -756,7 +763,6 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind {
 ; X64-NOBMI-LABEL: andnot_bitreverse_i32:
 ; X64-NOBMI:       # %bb.0:
 ; X64-NOBMI-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NOBMI-NEXT:    notl %esi
 ; X64-NOBMI-NEXT:    bswapl %esi
 ; X64-NOBMI-NEXT:    movl %esi, %eax
 ; X64-NOBMI-NEXT:    andl $252645135, %eax # imm = 0xF0F0F0F
@@ -774,6 +780,7 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind {
 ; X64-NOBMI-NEXT:    shrl %eax
 ; X64-NOBMI-NEXT:    andl $1431655765, %eax # imm = 0x55555555
 ; X64-NOBMI-NEXT:    leal (%rax,%rcx,2), %eax
+; X64-NOBMI-NEXT:    notl %eax
 ; X64-NOBMI-NEXT:    andl %edi, %eax
 ; X64-NOBMI-NEXT:    retq
 ;
@@ -808,8 +815,7 @@ define i32 @andnot_bitreverse_i32(i32 %a0, i32 %a1) nounwind {
 define i16 @andnot_bitreverse_i16(i16 %a0, i16 %a1) nounwind {
 ; X86-LABEL: andnot_bitreverse_i16:
 ; X86:       # %bb.0:
-; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    notl %eax
+; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
 ; X86-NEXT:    rolw $8, %ax
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    andl $3855, %ecx # imm = 0xF0F
@@ -827,34 +833,59 @@ define i16 @andnot_bitreverse_i16(i16 %a0, i16 %a1) nounwind {
 ; X86-NEXT:    shrl %eax
 ; X86-NEXT:    andl $21845, %eax # imm = 0x5555
 ; X86-NEXT:    leal (%eax,%ecx,2), %eax
+; X86-NEXT:    notl %eax
 ; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax
 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
 ; X86-NEXT:    retl
 ;
-; X64-LABEL: andnot_bitreverse_i16:
-; X64:       # %bb.0:
-; X64-NEXT:    # kill: def $esi killed $esi def $rsi
-; X64-NEXT:    notl %esi
-; X64-NEXT:    rolw $8, %si
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    andl $3855, %eax # imm = 0xF0F
-; X64-NEXT:    shll $4, %eax
-; X64-NEXT:    shrl $4, %esi
-; X64-NEXT:    andl $3855, %esi # imm = 0xF0F
-; X64-NEXT:    orl %eax, %esi
-; X64-NEXT:    movl %esi, %eax
-; X64-NEXT:    andl $13107, %eax # imm = 0x3333
-; X64-NEXT:    shrl $2, %esi
-; X64-NEXT:    andl $13107, %esi # imm = 0x3333
-; X64-NEXT:    leal (%rsi,%rax,4), %eax
-; X64-NEXT:    movl %eax, %ecx
-; X64-NEXT:    andl $21845, %ecx # imm = 0x5555
-; X64-NEXT:    shrl %eax
-; X64-NEXT:    andl $21845, %eax # imm = 0x5555
-; X64-NEXT:    leal (%rax,%rcx,2), %eax
-; X64-NEXT:    andl %edi, %eax
-; X64-NEXT:    # kill: def $ax killed $ax killed $eax
-; X64-NEXT:    retq
+; X64-NOBMI-LABEL: andnot_bitreverse_i16:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-NOBMI-NEXT:    rolw $8, %si
+; X64-NOBMI-NEXT:    movl %esi, %eax
+; X64-NOBMI-NEXT:    andl $3855, %eax # imm = 0xF0F
+; X64-NOBMI-NEXT:    shll $4, %eax
+; X64-NOBMI-NEXT:    shrl $4, %esi
+; X64-NOBMI-NEXT:    andl $3855, %esi # imm = 0xF0F
+; X64-NOBMI-NEXT:    orl %eax, %esi
+; X64-NOBMI-NEXT:    movl %esi, %eax
+; X64-NOBMI-NEXT:    andl $13107, %eax # imm = 0x3333
+; X64-NOBMI-NEXT:    shrl $2, %esi
+; X64-NOBMI-NEXT:    andl $13107, %esi # imm = 0x3333
+; X64-NOBMI-NEXT:    leal (%rsi,%rax,4), %eax
+; X64-NOBMI-NEXT:    movl %eax, %ecx
+; X64-NOBMI-NEXT:    andl $21845, %ecx # imm = 0x5555
+; X64-NOBMI-NEXT:    shrl %eax
+; X64-NOBMI-NEXT:    andl $21845, %eax # imm = 0x5555
+; X64-NOBMI-NEXT:    leal (%rax,%rcx,2), %eax
+; X64-NOBMI-NEXT:    notl %eax
+; X64-NOBMI-NEXT:    andl %edi, %eax
+; X64-NOBMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: andnot_bitreverse_i16:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    # kill: def $esi killed $esi def $rsi
+; X64-BMI-NEXT:    rolw $8, %si
+; X64-BMI-NEXT:    movl %esi, %eax
+; X64-BMI-NEXT:    andl $3855, %eax # imm = 0xF0F
+; X64-BMI-NEXT:    shll $4, %eax
+; X64-BMI-NEXT:    shrl $4, %esi
+; X64-BMI-NEXT:    andl $3855, %esi # imm = 0xF0F
+; X64-BMI-NEXT:    orl %eax, %esi
+; X64-BMI-NEXT:    movl %esi, %eax
+; X64-BMI-NEXT:    andl $13107, %eax # imm = 0x3333
+; X64-BMI-NEXT:    shrl $2, %esi
+; X64-BMI-NEXT:    andl $13107, %esi # imm = 0x3333
+; X64-BMI-NEXT:    leal (%rsi,%rax,4), %eax
+; X64-BMI-NEXT:    movl %eax, %ecx
+; X64-BMI-NEXT:    andl $21845, %ecx # imm = 0x5555
+; X64-BMI-NEXT:    shrl %eax
+; X64-BMI-NEXT:    andl $21845, %eax # imm = 0x5555
+; X64-BMI-NEXT:    leal (%rax,%rcx,2), %eax
+; X64-BMI-NEXT:    andnl %edi, %eax, %eax
+; X64-BMI-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-BMI-NEXT:    retq
   %not = xor i16 %a1, -1
   %bitrev = tail call i16 @llvm.bitreverse.i16(i16 %not)
   %and = and i16 %bitrev, %a0
@@ -865,7 +896,6 @@ define i8 @andnot_bitreverse_i8(i8 %a0, i8 %a1) nounwind {
 ; X86-LABEL: andnot_bitreverse_i8:
 ; X86:       # %bb.0:
 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax
-; X86-NEXT:    notb %al
 ; X86-NEXT:    rolb $4, %al
 ; X86-NEXT:    movl %eax, %ecx
 ; X86-NEXT:    andb $51, %cl
@@ -879,12 +909,12 @@ define i8 @andnot_bitreverse_i8(i8 %a0, i8 %a1) nounwind {
 ; X86-NEXT:    shrb %al
 ; X86-NEXT:    andb $85, %al
 ; X86-NEXT:    orb %cl, %al
+; X86-NEXT:    notb %al
 ; X86-NEXT:    andb {{[0-9]+}}(%esp), %al
 ; X86-NEXT:    retl
 ;
 ; X64-LABEL: andnot_bitreverse_i8:
 ; X64:       # %bb.0:
-; X64-NEXT:    notb %sil
 ; X64-NEXT:    rolb $4, %sil
 ; X64-NEXT:    movl %esi, %eax
 ; X64-NEXT:    andb $51, %al
@@ -898,6 +928,7 @@ define i8 @andnot_bitreverse_i8(i8 %a0, i8 %a1) nounwind {
 ; X64-NEXT:    shrb %al
 ; X64-NEXT:    andb $85, %al
 ; X64-NEXT:    orb %cl, %al
+; X64-NEXT:    notb %al
 ; X64-NEXT:    andb %dil, %al
 ; X64-NEXT:    retq
   %not = xor i8 %a1, -1



More information about the llvm-commits mailing list