[llvm] [InstCombine] Avoid DeMorgan's on occasion (PR #109215)

Miguel Saldivar via llvm-commits llvm-commits at lists.llvm.org
Thu Sep 19 22:59:31 PDT 2024


https://github.com/Saldivarcher updated https://github.com/llvm/llvm-project/pull/109215

>From a28360790fb99852578a15b408e7f312daecb100 Mon Sep 17 00:00:00 2001
From: Miguel Saldivar <miguel.saldivar at hpe.com>
Date: Fri, 20 Sep 2024 00:55:09 -0500
Subject: [PATCH] [X86] Invert `(and X, ~(and ~Y, Z))` back into `(and X, (or
 Y, ~Z))`

The reason for this inversion is to utilize the `andn` instruction,
which in turn produces less assembly code.

This is the assembly we produced previously:
```
        not     rcx
        and     rsi, rdx
        andn    rax, rsi, rdi
        or      rcx, rdx
        and     rax, rcx
        ret
```

The assembly with the inversion:

```
        and     rsi, rdx
        andn    rcx, rdx, rcx
        andn    rax, rsi, rdi
        andn    rax, rcx, rax
        ret
```
---
 llvm/lib/Target/X86/X86ISelLowering.cpp | 29 ++++++++++++++++++
 llvm/test/CodeGen/X86/avx512vl-logic.ll |  2 +-
 llvm/test/CodeGen/X86/pr108731.ll       | 40 +++++++++++++++++++++++++
 3 files changed, 70 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/X86/pr108731.ll

diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c91d37727b6117..36af69fc694409 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -49633,6 +49633,32 @@ static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) {
          (VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()));
 }
 
+/// InstCombine converts:
+///    `(and X, ~(and ~Y, Z))`
+/// to
+///    `(and X, (or Y, ~Z))`
+///
+/// But we should undo this transformation if the `andn` instruction is
+/// available to us.
+static SDValue combineAndNotOrIntoAndNotAnd(SDNode *N, SelectionDAG &DAG,
+                                            const X86Subtarget &Subtarget) {
+
+  using namespace llvm::SDPatternMatch;
+  MVT VT = N->getSimpleValueType(0);
+  SDLoc DL(N);
+  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+  if (TLI.hasAndNot(SDValue(N, 0))) {
+    SDValue X, Y, Z;
+    if (sd_match(N, m_And(m_Value(X),
+                          m_OneUse(m_Or(m_Value(Y), m_Not(m_Value(Z)))))))
+      return DAG.getNode(
+          ISD::AND, DL, VT, X,
+          DAG.getNOT(
+              DL, DAG.getNode(ISD::AND, DL, VT, DAG.getNOT(DL, Y, VT), Z), VT));
+  }
+  return SDValue();
+}
+
 // This function recognizes cases where X86 bzhi instruction can replace and
 // 'and-load' sequence.
 // In case of loading integer value from an array of constants which is defined
@@ -50130,6 +50156,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
   if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget))
     return R;
 
+  if (SDValue R = combineAndNotOrIntoAndNotAnd(N, DAG, Subtarget))
+    return R;
+
   // fold (and (mul x, c1), c2) -> (mul x, (and c1, c2))
   // iff c2 is all/no bits mask - i.e. a select-with-zero mask.
   // TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW?
diff --git a/llvm/test/CodeGen/X86/avx512vl-logic.ll b/llvm/test/CodeGen/X86/avx512vl-logic.ll
index 58621967e2aca6..ea9d785ed88ac2 100644
--- a/llvm/test/CodeGen/X86/avx512vl-logic.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-logic.ll
@@ -980,7 +980,7 @@ define <4 x i32> @ternlog_or_andn(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 define <4 x i32> @ternlog_and_orn(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
 ; CHECK-LABEL: ternlog_and_orn:
 ; CHECK:       ## %bb.0:
-; CHECK-NEXT:    vpternlogd $176, %xmm1, %xmm2, %xmm0
+; CHECK-NEXT:    vpternlogd $208, %xmm2, %xmm1, %xmm0
 ; CHECK-NEXT:    retq
   %a = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
   %b = or <4 x i32> %a, %y
diff --git a/llvm/test/CodeGen/X86/pr108731.ll b/llvm/test/CodeGen/X86/pr108731.ll
new file mode 100644
index 00000000000000..6022b068536935
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr108731.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-gnu-unknown -mcpu=znver3 | FileCheck %s
+
+define dso_local i64 @foo(i64 %0, i64 %1, i64 %2, i64 %3) local_unnamed_addr {
+; CHECK-LABEL: foo:
+; CHECK:       # %bb.0: # %Entry
+; CHECK-NEXT:    andq %rdx, %rsi
+; CHECK-NEXT:    andnq %rcx, %rdx, %rcx
+; CHECK-NEXT:    andnq %rdi, %rsi, %rax
+; CHECK-NEXT:    andnq %rax, %rcx, %rax
+; CHECK-NEXT:    retq
+Entry:
+  %4 = and i64 %2, %1
+  %5 = xor i64 %4, -1
+  %6 = and i64 %5, %0
+  %.not = xor i64 %3, -1
+  %7 = or i64 %.not, %2
+  %8 = and i64 %6, %7
+  ret i64 %8
+}
+
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+
+define dso_local <16 x i8> @fooVec(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3) local_unnamed_addr {
+; CHECK-LABEL: fooVec:
+; CHECK:       # %bb.0: # %Entry
+; CHECK-NEXT:    vandps %xmm1, %xmm2, %xmm1
+; CHECK-NEXT:    vandnps %xmm3, %xmm2, %xmm2
+; CHECK-NEXT:    vandnps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT:    vandnps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT:    retq
+Entry:
+  %4 = and <16 x i8> %2, %1
+  %5 = xor <16 x i8> %4, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %6 = and <16 x i8> %5, %0
+  %.not = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+  %7 = or <16 x i8> %.not, %2
+  %8 = and <16 x i8> %6, %7
+  ret <16 x i8> %8
+}



More information about the llvm-commits mailing list