[llvm] 6fd229a - [X86] Invert (and X, ~(and ~Y, Z)) back into (and X, (or Y, ~Z)) (#109215)

via llvm-commits llvm-commits at lists.llvm.org
Sat Oct 12 03:28:42 PDT 2024


Author: Miguel Saldivar
Date: 2024-10-12T11:28:39+01:00
New Revision: 6fd229a655f521a9f58d40c671e5cab4ea3ea87b

URL: https://github.com/llvm/llvm-project/commit/6fd229a655f521a9f58d40c671e5cab4ea3ea87b
DIFF: https://github.com/llvm/llvm-project/commit/6fd229a655f521a9f58d40c671e5cab4ea3ea87b.diff

LOG: [X86] Invert (and X, ~(and ~Y, Z)) back into (and X, (or Y, ~Z)) (#109215)

When `andn` is available, we should avoid switching `s &= ~(z & ~y);` into `s &= ~z | y;`

This patch turns this assembly from:
```
foo:
        not     rcx
        and     rsi, rdx
        andn    rax, rsi, rdi
        or      rcx, rdx
        and     rax, rcx
        ret
```
into:
```
foo:
        and     rsi, rdx
        andn    rcx, rdx, rcx
        andn    rax, rsi, rdi
        andn    rax, rcx, rax
        ret
```
Fixes #108731

Added: 
    llvm/test/CodeGen/X86/pr108731.ll

Modified: 
    llvm/lib/Target/X86/X86ISelLowering.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index de88db22279797..e57ca7a31dce2a 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -50002,6 +50002,28 @@ static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) {
          (VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()));
 }
 
+/// Folds (and X, (or Y, ~Z)) --> (and X, ~(and ~Y, Z))
+/// This undoes the inverse fold performed in InstCombine
+static SDValue combineAndNotOrIntoAndNotAnd(SDNode *N, SelectionDAG &DAG) {
+
+  using namespace llvm::SDPatternMatch;
+  MVT VT = N->getSimpleValueType(0);
+  SDLoc DL(N);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (!TLI.hasAndNot(SDValue(N, 0)))
+    return SDValue();
+
+  SDValue X, Y, Z;
+  if (sd_match(
+          N, m_And(m_Value(X), m_OneUse(m_Or(m_Value(Y), m_Not(m_Value(Z)))))))
+    return DAG.getNode(
+        ISD::AND, DL, VT, X,
+        DAG.getNOT(DL, DAG.getNode(ISD::AND, DL, VT, DAG.getNOT(DL, Y, VT), Z),
+                   VT));
+
+  return SDValue();
+}
+
 // This function recognizes cases where X86 bzhi instruction can replace and
 // 'and-load' sequence.
 // In case of loading integer value from an array of constants which is defined
@@ -50493,6 +50515,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
   if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget))
     return R;
 
+  if (SDValue R = combineAndNotOrIntoAndNotAnd(N, DAG))
+    return R;
+
   // fold (and (mul x, c1), c2) -> (mul x, (and c1, c2))
   // iff c2 is all/no bits mask - i.e. a select-with-zero mask.
   // TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW?

diff  --git a/llvm/test/CodeGen/X86/pr108731.ll b/llvm/test/CodeGen/X86/pr108731.ll
new file mode 100644
index 00000000000000..1c6d2deb701afa
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr108731.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,NOBMI
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,BMI
+
+define i64 @foo(i64 %w, i64 %x, i64 %y, i64 %z) {
+; NOBMI-LABEL: foo:
+; NOBMI:       # %bb.0: # %Entry
+; NOBMI-NEXT:    movq %rcx, %rax
+; NOBMI-NEXT:    andq %rdx, %rsi
+; NOBMI-NEXT:    notq %rsi
+; NOBMI-NEXT:    andq %rdi, %rsi
+; NOBMI-NEXT:    notq %rax
+; NOBMI-NEXT:    orq %rdx, %rax
+; NOBMI-NEXT:    andq %rsi, %rax
+; NOBMI-NEXT:    retq
+;
+; BMI-LABEL: foo:
+; BMI:       # %bb.0: # %Entry
+; BMI-NEXT:    andq %rdx, %rsi
+; BMI-NEXT:    andnq %rdi, %rsi, %rax
+; BMI-NEXT:    andnq %rcx, %rdx, %rcx
+; BMI-NEXT:    andnq %rax, %rcx, %rax
+; BMI-NEXT:    retq
+Entry:
+  %and1 = and i64 %y, %x
+  %xor1 = xor i64 %and1, -1
+  %and2 = and i64 %xor1, %w
+  %.not = xor i64 %z, -1
+  %or1 = or i64 %.not, %y
+  %and3 = and i64 %and2, %or1
+  ret i64 %and3
+}
+
+define <16 x i8> @fooVec(<16 x i8> %w, <16 x i8> %x, <16 x i8> %y, <16 x i8> %z) {
+; NOBMI-LABEL: fooVec:
+; NOBMI:       # %bb.0: # %Entry
+; NOBMI-NEXT:    andps %xmm2, %xmm1
+; NOBMI-NEXT:    andnps %xmm0, %xmm1
+; NOBMI-NEXT:    andnps %xmm3, %xmm2
+; NOBMI-NEXT:    andnps %xmm1, %xmm2
+; NOBMI-NEXT:    movaps %xmm2, %xmm0
+; NOBMI-NEXT:    retq
+;
+; BMI-LABEL: fooVec:
+; BMI:       # %bb.0: # %Entry
+; BMI-NEXT:    vandps %xmm1, %xmm2, %xmm1
+; BMI-NEXT:    vandnps %xmm0, %xmm1, %xmm0
+; BMI-NEXT:    vandnps %xmm3, %xmm2, %xmm1
+; BMI-NEXT:    vandnps %xmm0, %xmm1, %xmm0
+; BMI-NEXT:    retq
+Entry:
+  %and1 = and <16 x i8> %y, %x
+  %xor1 = xor <16 x i8> %and1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %and2 = and <16 x i8> %xor1, %w
+  %.not = xor <16 x i8> %z, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %or1 = or <16 x i8> %.not, %y
+  %and3 = and <16 x i8> %and2, %or1
+  ret <16 x i8> %and3
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}


        


More information about the llvm-commits mailing list