[llvm] 8bea511 - [X86] Fold AND(Y, XOR(X, SUB(0, X))) to ANDN(Y, BLSMSK(X)) (#128348)

via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 25 09:11:06 PST 2025


Author: Marius Kamp
Date: 2025-02-25T17:11:00Z
New Revision: 8bea51103000e4ac752ecd8ed1550c1c9d105a6b

URL: https://github.com/llvm/llvm-project/commit/8bea51103000e4ac752ecd8ed1550c1c9d105a6b
DIFF: https://github.com/llvm/llvm-project/commit/8bea51103000e4ac752ecd8ed1550c1c9d105a6b.diff

LOG: [X86] Fold AND(Y, XOR(X, SUB(0, X))) to ANDN(Y, BLSMSK(X)) (#128348)

XOR(X, SUB(0, X)) corresponds to a bitwise-negated BLSMSK instruction
(i.e., x ^ (x - 1)). On its own, this transformation is probably not
really profitable but when the XOR operation is an operand of an AND
operation, we can use an ANDN instruction to reduce the number of
emitted instructions by one.
    
Fixes #103501.

Added: 
    llvm/test/CodeGen/X86/andnot-blsmsk.ll

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 466634ebfacd0..2e88a27bbd2c6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -51064,6 +51064,31 @@ static SDValue combineBMILogicOp(SDNode *N, SelectionDAG &DAG,
   return SDValue();
 }
 
+/// Fold AND(Y, XOR(X, NEG(X))) -> ANDN(Y, BLSMSK(X)) if BMI is available.
+static SDValue combineAndXorSubWithBMI(SDNode *And, const SDLoc &DL,
+                                       SelectionDAG &DAG,
+                                       const X86Subtarget &Subtarget) {
+  using namespace llvm::SDPatternMatch;
+
+  EVT VT = And->getValueType(0);
+  // Make sure this node is a candidate for BMI instructions.
+  if (!Subtarget.hasBMI() || (VT != MVT::i32 && VT != MVT::i64))
+    return SDValue();
+
+  SDValue X;
+  SDValue Y;
+  if (!sd_match(And, m_And(m_OneUse(m_Xor(m_Value(X),
+                                          m_OneUse(m_Neg(m_Deferred(X))))),
+                           m_Value(Y))))
+    return SDValue();
+
+  SDValue BLSMSK =
+      DAG.getNode(ISD::XOR, DL, VT, X,
+                  DAG.getNode(ISD::SUB, DL, VT, X, DAG.getConstant(1, DL, VT)));
+  SDValue AndN = DAG.getNode(ISD::AND, DL, VT, Y, DAG.getNOT(DL, BLSMSK, VT));
+  return AndN;
+}
+
 static SDValue combineX86SubCmpForFlags(SDNode *N, SDValue Flag,
                                         SelectionDAG &DAG,
                                         TargetLowering::DAGCombinerInfo &DCI,
@@ -51472,6 +51497,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
   if (SDValue R = combineBMILogicOp(N, DAG, Subtarget))
     return R;
 
+  if (SDValue R = combineAndXorSubWithBMI(N, dl, DAG, Subtarget))
+    return R;
+
   return SDValue();
 }
 

diff  --git a/llvm/test/CodeGen/X86/andnot-blsmsk.ll b/llvm/test/CodeGen/X86/andnot-blsmsk.ll
new file mode 100644
index 0000000000000..74766821f6ce7
--- /dev/null
+++ b/llvm/test/CodeGen/X86/andnot-blsmsk.ll
@@ -0,0 +1,327 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=i686-- -mattr=-bmi,+sse2 | FileCheck %s --check-prefixes=X86,X86-NOBMI
+; RUN: llc < %s -mtriple=i686-- -mattr=+bmi,+sse2 | FileCheck %s --check-prefixes=X86,X86-BMI
+; RUN: llc < %s -mtriple=x86_64-- -mattr=-bmi | FileCheck %s --check-prefixes=X64,X64-NOBMI
+; RUN: llc < %s -mtriple=x86_64-- -mattr=+bmi | FileCheck %s --check-prefixes=X64,X64-BMI
+
+declare void @use(i32)
+
+define i32 @fold_and_xor_neg_v1_32(i32 %x, i32 %y) nounwind {
+; X86-NOBMI-LABEL: fold_and_xor_neg_v1_32:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl %ecx, %eax
+; X86-NOBMI-NEXT:    negl %eax
+; X86-NOBMI-NEXT:    xorl %ecx, %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: fold_and_xor_neg_v1_32:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    blsmskl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: fold_and_xor_neg_v1_32:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    negl %eax
+; X64-NOBMI-NEXT:    xorl %edi, %eax
+; X64-NOBMI-NEXT:    andl %esi, %eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: fold_and_xor_neg_v1_32:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    blsmskl %edi, %eax
+; X64-BMI-NEXT:    andnl %esi, %eax, %eax
+; X64-BMI-NEXT:    retq
+  %neg = sub i32 0, %x
+  %xor = xor i32 %x, %neg
+  %and = and i32 %xor, %y
+  ret i32 %and
+}
+
+define i32 @fold_and_xor_neg_v2_32(i32 %x, i32 %y) nounwind {
+; X86-NOBMI-LABEL: fold_and_xor_neg_v2_32:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl %ecx, %eax
+; X86-NOBMI-NEXT:    negl %eax
+; X86-NOBMI-NEXT:    xorl %ecx, %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: fold_and_xor_neg_v2_32:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    blsmskl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: fold_and_xor_neg_v2_32:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    negl %eax
+; X64-NOBMI-NEXT:    xorl %edi, %eax
+; X64-NOBMI-NEXT:    andl %esi, %eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: fold_and_xor_neg_v2_32:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    blsmskl %edi, %eax
+; X64-BMI-NEXT:    andnl %esi, %eax, %eax
+; X64-BMI-NEXT:    retq
+  %neg = sub i32 0, %x
+  %xor = xor i32 %x, %neg
+  %and = and i32 %y, %xor
+  ret i32 %and
+}
+
+define i32 @fold_and_xor_neg_v3_32(i32 %x, i32 %y) nounwind {
+; X86-NOBMI-LABEL: fold_and_xor_neg_v3_32:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl %ecx, %eax
+; X86-NOBMI-NEXT:    negl %eax
+; X86-NOBMI-NEXT:    xorl %ecx, %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: fold_and_xor_neg_v3_32:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    blsmskl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: fold_and_xor_neg_v3_32:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    negl %eax
+; X64-NOBMI-NEXT:    xorl %edi, %eax
+; X64-NOBMI-NEXT:    andl %esi, %eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: fold_and_xor_neg_v3_32:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    blsmskl %edi, %eax
+; X64-BMI-NEXT:    andnl %esi, %eax, %eax
+; X64-BMI-NEXT:    retq
+  %neg = sub i32 0, %x
+  %xor = xor i32 %neg, %x
+  %and = and i32 %xor, %y
+  ret i32 %and
+}
+
+define i32 @fold_and_xor_neg_v4_32(i32 %x, i32 %y) nounwind {
+; X86-NOBMI-LABEL: fold_and_xor_neg_v4_32:
+; X86-NOBMI:       # %bb.0:
+; X86-NOBMI-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NOBMI-NEXT:    movl %ecx, %eax
+; X86-NOBMI-NEXT:    negl %eax
+; X86-NOBMI-NEXT:    xorl %ecx, %eax
+; X86-NOBMI-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NOBMI-NEXT:    retl
+;
+; X86-BMI-LABEL: fold_and_xor_neg_v4_32:
+; X86-BMI:       # %bb.0:
+; X86-BMI-NEXT:    blsmskl {{[0-9]+}}(%esp), %eax
+; X86-BMI-NEXT:    andnl {{[0-9]+}}(%esp), %eax, %eax
+; X86-BMI-NEXT:    retl
+;
+; X64-NOBMI-LABEL: fold_and_xor_neg_v4_32:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movl %edi, %eax
+; X64-NOBMI-NEXT:    negl %eax
+; X64-NOBMI-NEXT:    xorl %edi, %eax
+; X64-NOBMI-NEXT:    andl %esi, %eax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: fold_and_xor_neg_v4_32:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    blsmskl %edi, %eax
+; X64-BMI-NEXT:    andnl %esi, %eax, %eax
+; X64-BMI-NEXT:    retq
+  %neg = sub i32 0, %x
+  %xor = xor i32 %neg, %x
+  %and = and i32 %y, %xor
+  ret i32 %and
+}
+
+define i64 @fold_and_xor_neg_v1_64(i64 %x, i64 %y) nounwind {
+; X86-LABEL: fold_and_xor_neg_v1_64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    xorl %edx, %edx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    sbbl %esi, %edx
+; X86-NEXT:    xorl %esi, %edx
+; X86-NEXT:    xorl %ecx, %eax
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-NOBMI-LABEL: fold_and_xor_neg_v1_64:
+; X64-NOBMI:       # %bb.0:
+; X64-NOBMI-NEXT:    movq %rdi, %rax
+; X64-NOBMI-NEXT:    negq %rax
+; X64-NOBMI-NEXT:    xorq %rdi, %rax
+; X64-NOBMI-NEXT:    andq %rsi, %rax
+; X64-NOBMI-NEXT:    retq
+;
+; X64-BMI-LABEL: fold_and_xor_neg_v1_64:
+; X64-BMI:       # %bb.0:
+; X64-BMI-NEXT:    blsmskq %rdi, %rax
+; X64-BMI-NEXT:    andnq %rsi, %rax, %rax
+; X64-BMI-NEXT:    retq
+  %neg = sub i64 0, %x
+  %xor = xor i64 %x, %neg
+  %and = and i64 %xor, %y
+  ret i64 %and
+}
+
+; Negative test
+define i16 @fold_and_xor_neg_v1_16_negative(i16 %x, i16 %y) nounwind {
+; X86-LABEL: fold_and_xor_neg_v1_16_negative:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl %ecx, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    xorl %ecx, %eax
+; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax
+; X86-NEXT:    # kill: def $ax killed $ax killed $eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: fold_and_xor_neg_v1_16_negative:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    xorl %edi, %eax
+; X64-NEXT:    andl %esi, %eax
+; X64-NEXT:    # kill: def $ax killed $ax killed $eax
+; X64-NEXT:    retq
+  %neg = sub i16 0, %x
+  %xor = xor i16 %x, %neg
+  %and = and i16 %xor, %y
+  ret i16 %and
+}
+
+; Negative test
+define <4 x i32> @fold_and_xor_neg_v1_v4x32_negative(<4 x i32> %x, <4 x i32> %y) nounwind {
+; X86-LABEL: fold_and_xor_neg_v1_v4x32_negative:
+; X86:       # %bb.0:
+; X86-NEXT:    pxor %xmm2, %xmm2
+; X86-NEXT:    psubd %xmm0, %xmm2
+; X86-NEXT:    pxor %xmm2, %xmm0
+; X86-NEXT:    pand %xmm1, %xmm0
+; X86-NEXT:    retl
+;
+; X64-LABEL: fold_and_xor_neg_v1_v4x32_negative:
+; X64:       # %bb.0:
+; X64-NEXT:    pxor %xmm2, %xmm2
+; X64-NEXT:    psubd %xmm0, %xmm2
+; X64-NEXT:    pxor %xmm2, %xmm0
+; X64-NEXT:    pand %xmm1, %xmm0
+; X64-NEXT:    retq
+  %neg = sub <4 x i32> zeroinitializer, %x
+  %xor = xor <4 x i32> %x, %neg
+  %and = and <4 x i32> %xor, %y
+  ret <4 x i32> %and
+}
+
+; Negative test
+define i32 @fold_and_xor_neg_v1_32_two_uses_xor_negative(i32 %x, i32 %y) nounwind {
+; X86-LABEL: fold_and_xor_neg_v1_32_two_uses_xor_negative:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl %eax, %ecx
+; X86-NEXT:    negl %ecx
+; X86-NEXT:    xorl %eax, %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    andl %ecx, %esi
+; X86-NEXT:    pushl %ecx
+; X86-NEXT:    calll use at PLT
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: fold_and_xor_neg_v1_32_two_uses_xor_negative:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movl %esi, %ebx
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    xorl %eax, %edi
+; X64-NEXT:    andl %edi, %ebx
+; X64-NEXT:    callq use at PLT
+; X64-NEXT:    movl %ebx, %eax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    retq
+  %neg = sub i32 0, %x
+  %xor = xor i32 %x, %neg
+  %and = and i32 %xor, %y
+  call void @use(i32 %xor)
+  ret i32 %and
+}
+
+; Negative test
+define i32 @fold_and_xor_neg_v1_32_two_uses_sub_negative(i32 %x, i32 %y) nounwind {
+; X86-LABEL: fold_and_xor_neg_v1_32_two_uses_sub_negative:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    negl %eax
+; X86-NEXT:    xorl %eax, %esi
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    pushl %eax
+; X86-NEXT:    calll use at PLT
+; X86-NEXT:    addl $4, %esp
+; X86-NEXT:    movl %esi, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: fold_and_xor_neg_v1_32_two_uses_sub_negative:
+; X64:       # %bb.0:
+; X64-NEXT:    pushq %rbx
+; X64-NEXT:    movl %edi, %ebx
+; X64-NEXT:    negl %edi
+; X64-NEXT:    xorl %edi, %ebx
+; X64-NEXT:    andl %esi, %ebx
+; X64-NEXT:    callq use at PLT
+; X64-NEXT:    movl %ebx, %eax
+; X64-NEXT:    popq %rbx
+; X64-NEXT:    retq
+  %neg = sub i32 0, %x
+  %xor = xor i32 %x, %neg
+  %and = and i32 %xor, %y
+  call void @use(i32 %neg)
+  ret i32 %and
+}
+
+; Negative test
+define i32 @fold_and_xor_neg_v1_32_no_blsmsk_negative(i32 %x, i32 %y, i32 %z) nounwind {
+; X86-LABEL: fold_and_xor_neg_v1_32_no_blsmsk_negative:
+; X86:       # %bb.0:
+; X86-NEXT:    xorl %eax, %eax
+; X86-NEXT:    subl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    xorl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: fold_and_xor_neg_v1_32_no_blsmsk_negative:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edx, %eax
+; X64-NEXT:    negl %eax
+; X64-NEXT:    xorl %edi, %eax
+; X64-NEXT:    andl %esi, %eax
+; X64-NEXT:    retq
+  %neg = sub i32 0, %z
+  %xor = xor i32 %x, %neg
+  %and = and i32 %xor, %y
+  ret i32 %and
+}


        


More information about the llvm-commits mailing list