[llvm] [X86] Invert (and X, ~(and ~Y, Z)) back into (and X, (or Y, ~Z)) (PR #109215)

Miguel Saldivar via llvm-commits llvm-commits at lists.llvm.org
Fri Oct 11 10:46:56 PDT 2024


https://github.com/Saldivarcher updated https://github.com/llvm/llvm-project/pull/109215

>From 9b66ebf04686f70ea7e7f01f913a1a98b9f5975e Mon Sep 17 00:00:00 2001
From: Miguel Saldivar <miguel.saldivar at hpe.com>
Date: Fri, 20 Sep 2024 00:55:09 -0500
Subject: [PATCH] [X86] Invert `(and X, ~(and ~Y, Z))` back into `(and X, (or
 Y, ~Z))`

The reason for this inversion is to utilize the `andn` instruction,
which in turn produces less assembly code.

This is the assembly we produced previously:
```
        not     rcx
        and     rsi, rdx
        andn    rax, rsi, rdi
        or      rcx, rdx
        and     rax, rcx
        ret
```

The assembly with the inversion:

```
        and     rsi, rdx
        andn    rcx, rdx, rcx
        andn    rax, rsi, rdi
        andn    rax, rcx, rax
        ret
```
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 25 ++++++++++
 llvm/test/CodeGen/X86/pr108731.ll       | 61 +++++++++++++++++++++++++
 2 files changed, 86 insertions(+)
 create mode 100644 llvm/test/CodeGen/X86/pr108731.ll

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 77c10baa31bd21..244b0e4379e2ad 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -49986,6 +49986,28 @@ static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) {
          (VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()));
 }
 
+/// Folds (and X, (or Y, ~Z)) --> (and X, ~(and ~Y, Z))
+/// This undoes the inverse fold performed in InstCombine
+static SDValue combineAndNotOrIntoAndNotAnd(SDNode *N, SelectionDAG &DAG) {
+
+  using namespace llvm::SDPatternMatch;
+  MVT VT = N->getSimpleValueType(0);
+  SDLoc DL(N);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (!TLI.hasAndNot(SDValue(N, 0)))
+    return SDValue();
+
+  SDValue X, Y, Z;
+  if (sd_match(
+          N, m_And(m_Value(X), m_OneUse(m_Or(m_Value(Y), m_Not(m_Value(Z)))))))
+    return DAG.getNode(
+        ISD::AND, DL, VT, X,
+        DAG.getNOT(DL, DAG.getNode(ISD::AND, DL, VT, DAG.getNOT(DL, Y, VT), Z),
+                   VT));
+
+  return SDValue();
+}
+
 // This function recognizes cases where X86 bzhi instruction can replace and
 // 'and-load' sequence.
 // In case of loading integer value from an array of constants which is defined
@@ -50477,6 +50499,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
   if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget))
     return R;
 
+  if (SDValue R = combineAndNotOrIntoAndNotAnd(N, DAG))
+    return R;
+
   // fold (and (mul x, c1), c2) -> (mul x, (and c1, c2))
   // iff c2 is all/no bits mask - i.e. a select-with-zero mask.
   // TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW?
diff --git a/llvm/test/CodeGen/X86/pr108731.ll b/llvm/test/CodeGen/X86/pr108731.ll
new file mode 100644
index 00000000000000..1c6d2deb701afa
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr108731.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,NOBMI
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,BMI
+
+define i64 @foo(i64 %w, i64 %x, i64 %y, i64 %z) {
+; NOBMI-LABEL: foo:
+; NOBMI:       # %bb.0: # %Entry
+; NOBMI-NEXT:    movq %rcx, %rax
+; NOBMI-NEXT:    andq %rdx, %rsi
+; NOBMI-NEXT:    notq %rsi
+; NOBMI-NEXT:    andq %rdi, %rsi
+; NOBMI-NEXT:    notq %rax
+; NOBMI-NEXT:    orq %rdx, %rax
+; NOBMI-NEXT:    andq %rsi, %rax
+; NOBMI-NEXT:    retq
+;
+; BMI-LABEL: foo:
+; BMI:       # %bb.0: # %Entry
+; BMI-NEXT:    andq %rdx, %rsi
+; BMI-NEXT:    andnq %rdi, %rsi, %rax
+; BMI-NEXT:    andnq %rcx, %rdx, %rcx
+; BMI-NEXT:    andnq %rax, %rcx, %rax
+; BMI-NEXT:    retq
+Entry:
+  %and1 = and i64 %y, %x
+  %xor1 = xor i64 %and1, -1
+  %and2 = and i64 %xor1, %w
+  %.not = xor i64 %z, -1
+  %or1 = or i64 %.not, %y
+  %and3 = and i64 %and2, %or1
+  ret i64 %and3
+}
+
+define <16 x i8> @fooVec(<16 x i8> %w, <16 x i8> %x, <16 x i8> %y, <16 x i8> %z) {
+; NOBMI-LABEL: fooVec:
+; NOBMI:       # %bb.0: # %Entry
+; NOBMI-NEXT:    andps %xmm2, %xmm1
+; NOBMI-NEXT:    andnps %xmm0, %xmm1
+; NOBMI-NEXT:    andnps %xmm3, %xmm2
+; NOBMI-NEXT:    andnps %xmm1, %xmm2
+; NOBMI-NEXT:    movaps %xmm2, %xmm0
+; NOBMI-NEXT:    retq
+;
+; BMI-LABEL: fooVec:
+; BMI:       # %bb.0: # %Entry
+; BMI-NEXT:    vandps %xmm1, %xmm2, %xmm1
+; BMI-NEXT:    vandnps %xmm0, %xmm1, %xmm0
+; BMI-NEXT:    vandnps %xmm3, %xmm2, %xmm1
+; BMI-NEXT:    vandnps %xmm0, %xmm1, %xmm0
+; BMI-NEXT:    retq
+Entry:
+  %and1 = and <16 x i8> %y, %x
+  %xor1 = xor <16 x i8> %and1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %and2 = and <16 x i8> %xor1, %w
+  %.not = xor <16 x i8> %z, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %or1 = or <16 x i8> %.not, %y
+  %and3 = and <16 x i8> %and2, %or1
+  ret <16 x i8> %and3
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}



More information about the llvm-commits mailing list