[llvm] [X86] Invert (and X, ~(and ~Y, Z)) back into (and X, (or Y, ~Z)) (PR #109215)
Miguel Saldivar via llvm-commits
llvm-commits at lists.llvm.org
Fri Sep 20 09:32:50 PDT 2024
https://github.com/Saldivarcher updated https://github.com/llvm/llvm-project/pull/109215
>From 71fe983760126eac887d5309200733aa5beebd1c Mon Sep 17 00:00:00 2001
From: Miguel Saldivar <miguel.saldivar at hpe.com>
Date: Fri, 20 Sep 2024 00:55:09 -0500
Subject: [PATCH] [X86] Invert `(and X, ~(and ~Y, Z))` back into `(and X, (or
Y, ~Z))`
The reason for this inversion is to utilize the `andn` instruction,
which in turn produces less assembly code.
This is the assembly we produced previously:
```
not rcx
and rsi, rdx
andn rax, rsi, rdi
or rcx, rdx
and rax, rcx
ret
```
The assembly with the inversion:
```
and rsi, rdx
andn rcx, rdx, rcx
andn rax, rsi, rdi
andn rax, rcx, rax
ret
```
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 29 ++++++++++++++++++
llvm/test/CodeGen/X86/avx512vl-logic.ll | 2 +-
llvm/test/CodeGen/X86/pr108731.ll | 40 +++++++++++++++++++++++++
3 files changed, 70 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/X86/pr108731.ll
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index c5dc3ea17f72c3..ed3b5c551bd69d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -50034,6 +50034,32 @@ static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) {
(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()));
}
+/// InstCombine converts:
+/// `(and X, ~(and ~Y, Z))`
+/// to
+/// `(and X, (or Y, ~Z))`
+///
+/// But we should undo this transformation if the `andn` instruction is
+/// available to us.
+static SDValue combineAndNotOrIntoAndNotAnd(SDNode *N, SelectionDAG &DAG,
+ const X86Subtarget &Subtarget) {
+
+ using namespace llvm::SDPatternMatch;
+ MVT VT = N->getSimpleValueType(0);
+ SDLoc DL(N);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (TLI.hasAndNot(SDValue(N, 0))) {
+ SDValue X, Y, Z;
+ if (sd_match(N, m_And(m_Value(X),
+ m_OneUse(m_Or(m_Value(Y), m_Not(m_Value(Z)))))))
+ return DAG.getNode(
+ ISD::AND, DL, VT, X,
+ DAG.getNOT(
+ DL, DAG.getNode(ISD::AND, DL, VT, DAG.getNOT(DL, Y, VT), Z), VT));
+ }
+ return SDValue();
+}
+
// This function recognizes cases where X86 bzhi instruction can replace and
// 'and-load' sequence.
// In case of loading integer value from an array of constants which is defined
@@ -50531,6 +50557,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget))
return R;
+ if (SDValue R = combineAndNotOrIntoAndNotAnd(N, DAG, Subtarget))
+ return R;
+
// fold (and (mul x, c1), c2) -> (mul x, (and c1, c2))
// iff c2 is all/no bits mask - i.e. a select-with-zero mask.
// TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW?
diff --git a/llvm/test/CodeGen/X86/avx512vl-logic.ll b/llvm/test/CodeGen/X86/avx512vl-logic.ll
index 58621967e2aca6..ea9d785ed88ac2 100644
--- a/llvm/test/CodeGen/X86/avx512vl-logic.ll
+++ b/llvm/test/CodeGen/X86/avx512vl-logic.ll
@@ -980,7 +980,7 @@ define <4 x i32> @ternlog_or_andn(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
define <4 x i32> @ternlog_and_orn(<4 x i32> %x, <4 x i32> %y, <4 x i32> %z) {
; CHECK-LABEL: ternlog_and_orn:
; CHECK: ## %bb.0:
-; CHECK-NEXT: vpternlogd $176, %xmm1, %xmm2, %xmm0
+; CHECK-NEXT: vpternlogd $208, %xmm2, %xmm1, %xmm0
; CHECK-NEXT: retq
%a = xor <4 x i32> %z, <i32 -1, i32 -1, i32 -1, i32 -1>
%b = or <4 x i32> %a, %y
diff --git a/llvm/test/CodeGen/X86/pr108731.ll b/llvm/test/CodeGen/X86/pr108731.ll
new file mode 100644
index 00000000000000..6022b068536935
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr108731.ll
@@ -0,0 +1,40 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-gnu-unknown -mcpu=znver3 | FileCheck %s
+
+define dso_local i64 @foo(i64 %0, i64 %1, i64 %2, i64 %3) local_unnamed_addr {
+; CHECK-LABEL: foo:
+; CHECK: # %bb.0: # %Entry
+; CHECK-NEXT: andq %rdx, %rsi
+; CHECK-NEXT: andnq %rcx, %rdx, %rcx
+; CHECK-NEXT: andnq %rdi, %rsi, %rax
+; CHECK-NEXT: andnq %rax, %rcx, %rax
+; CHECK-NEXT: retq
+Entry:
+ %4 = and i64 %2, %1
+ %5 = xor i64 %4, -1
+ %6 = and i64 %5, %0
+ %.not = xor i64 %3, -1
+ %7 = or i64 %.not, %2
+ %8 = and i64 %6, %7
+ ret i64 %8
+}
+
+declare void @llvm.dbg.value(metadata, metadata, metadata) #1
+
+define dso_local <16 x i8> @fooVec(<16 x i8> %0, <16 x i8> %1, <16 x i8> %2, <16 x i8> %3) local_unnamed_addr {
+; CHECK-LABEL: fooVec:
+; CHECK: # %bb.0: # %Entry
+; CHECK-NEXT: vandps %xmm1, %xmm2, %xmm1
+; CHECK-NEXT: vandnps %xmm3, %xmm2, %xmm2
+; CHECK-NEXT: vandnps %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vandnps %xmm0, %xmm2, %xmm0
+; CHECK-NEXT: retq
+Entry:
+ %4 = and <16 x i8> %2, %1
+ %5 = xor <16 x i8> %4, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %6 = and <16 x i8> %5, %0
+ %.not = xor <16 x i8> %3, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %7 = or <16 x i8> %.not, %2
+ %8 = and <16 x i8> %6, %7
+ ret <16 x i8> %8
+}
More information about the llvm-commits
mailing list