[llvm] [X86] Invert (and X, ~(and ~Y, Z)) back into (and X, (or Y, ~Z)) (PR #109215)
Miguel Saldivar via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 11 10:46:56 PDT 2024
https://github.com/Saldivarcher updated https://github.com/llvm/llvm-project/pull/109215
>From 9b66ebf04686f70ea7e7f01f913a1a98b9f5975e Mon Sep 17 00:00:00 2001
From: Miguel Saldivar <miguel.saldivar at hpe.com>
Date: Fri, 20 Sep 2024 00:55:09 -0500
Subject: [PATCH] [X86] Invert `(and X, ~(and ~Y, Z))` back into `(and X, (or
Y, ~Z))`
The reason for this inversion is to utilize the `andn` instruction,
which in turn produces less assembly code.
This is the assembly we produced previously:
```
not rcx
and rsi, rdx
andn rax, rsi, rdi
or rcx, rdx
and rax, rcx
ret
```
The assembly with the inversion:
```
and rsi, rdx
andn rcx, rdx, rcx
andn rax, rsi, rdi
andn rax, rcx, rax
ret
```
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 25 ++++++++++
llvm/test/CodeGen/X86/pr108731.ll | 61 +++++++++++++++++++++++++
2 files changed, 86 insertions(+)
create mode 100644 llvm/test/CodeGen/X86/pr108731.ll
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 77c10baa31bd21..244b0e4379e2ad 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -49986,6 +49986,28 @@ static bool hasBZHI(const X86Subtarget &Subtarget, MVT VT) {
(VT == MVT::i32 || (VT == MVT::i64 && Subtarget.is64Bit()));
}
+/// Folds (and X, (or Y, ~Z)) --> (and X, ~(and ~Y, Z))
+/// This undoes the inverse fold performed in InstCombine
+static SDValue combineAndNotOrIntoAndNotAnd(SDNode *N, SelectionDAG &DAG) {
+
+ using namespace llvm::SDPatternMatch;
+ MVT VT = N->getSimpleValueType(0);
+ SDLoc DL(N);
+ const TargetLowering &TLI = DAG.getTargetLoweringInfo();
+ if (!TLI.hasAndNot(SDValue(N, 0)))
+ return SDValue();
+
+ SDValue X, Y, Z;
+ if (sd_match(
+ N, m_And(m_Value(X), m_OneUse(m_Or(m_Value(Y), m_Not(m_Value(Z)))))))
+ return DAG.getNode(
+ ISD::AND, DL, VT, X,
+ DAG.getNOT(DL, DAG.getNode(ISD::AND, DL, VT, DAG.getNOT(DL, Y, VT), Z),
+ VT));
+
+ return SDValue();
+}
+
// This function recognizes cases where X86 bzhi instruction can replace and
// 'and-load' sequence.
// In case of loading integer value from an array of constants which is defined
@@ -50477,6 +50499,9 @@ static SDValue combineAnd(SDNode *N, SelectionDAG &DAG,
if (SDValue R = combineAndLoadToBZHI(N, DAG, Subtarget))
return R;
+ if (SDValue R = combineAndNotOrIntoAndNotAnd(N, DAG))
+ return R;
+
// fold (and (mul x, c1), c2) -> (mul x, (and c1, c2))
// iff c2 is all/no bits mask - i.e. a select-with-zero mask.
// TODO: Handle PMULDQ/PMULUDQ/VPMADDWD/VPMADDUBSW?
diff --git a/llvm/test/CodeGen/X86/pr108731.ll b/llvm/test/CodeGen/X86/pr108731.ll
new file mode 100644
index 00000000000000..1c6d2deb701afa
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr108731.ll
@@ -0,0 +1,61 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v2 | FileCheck %s --check-prefixes=CHECK,NOBMI
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v3 | FileCheck %s --check-prefixes=CHECK,BMI
+
+define i64 @foo(i64 %w, i64 %x, i64 %y, i64 %z) {
+; NOBMI-LABEL: foo:
+; NOBMI: # %bb.0: # %Entry
+; NOBMI-NEXT: movq %rcx, %rax
+; NOBMI-NEXT: andq %rdx, %rsi
+; NOBMI-NEXT: notq %rsi
+; NOBMI-NEXT: andq %rdi, %rsi
+; NOBMI-NEXT: notq %rax
+; NOBMI-NEXT: orq %rdx, %rax
+; NOBMI-NEXT: andq %rsi, %rax
+; NOBMI-NEXT: retq
+;
+; BMI-LABEL: foo:
+; BMI: # %bb.0: # %Entry
+; BMI-NEXT: andq %rdx, %rsi
+; BMI-NEXT: andnq %rdi, %rsi, %rax
+; BMI-NEXT: andnq %rcx, %rdx, %rcx
+; BMI-NEXT: andnq %rax, %rcx, %rax
+; BMI-NEXT: retq
+Entry:
+ %and1 = and i64 %y, %x
+ %xor1 = xor i64 %and1, -1
+ %and2 = and i64 %xor1, %w
+ %.not = xor i64 %z, -1
+ %or1 = or i64 %.not, %y
+ %and3 = and i64 %and2, %or1
+ ret i64 %and3
+}
+
+define <16 x i8> @fooVec(<16 x i8> %w, <16 x i8> %x, <16 x i8> %y, <16 x i8> %z) {
+; NOBMI-LABEL: fooVec:
+; NOBMI: # %bb.0: # %Entry
+; NOBMI-NEXT: andps %xmm2, %xmm1
+; NOBMI-NEXT: andnps %xmm0, %xmm1
+; NOBMI-NEXT: andnps %xmm3, %xmm2
+; NOBMI-NEXT: andnps %xmm1, %xmm2
+; NOBMI-NEXT: movaps %xmm2, %xmm0
+; NOBMI-NEXT: retq
+;
+; BMI-LABEL: fooVec:
+; BMI: # %bb.0: # %Entry
+; BMI-NEXT: vandps %xmm1, %xmm2, %xmm1
+; BMI-NEXT: vandnps %xmm0, %xmm1, %xmm0
+; BMI-NEXT: vandnps %xmm3, %xmm2, %xmm1
+; BMI-NEXT: vandnps %xmm0, %xmm1, %xmm0
+; BMI-NEXT: retq
+Entry:
+ %and1 = and <16 x i8> %y, %x
+ %xor1 = xor <16 x i8> %and1, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %and2 = and <16 x i8> %xor1, %w
+ %.not = xor <16 x i8> %z, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+ %or1 = or <16 x i8> %.not, %y
+ %and3 = and <16 x i8> %and2, %or1
+ ret <16 x i8> %and3
+}
+;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
+; CHECK: {{.*}}
More information about the llvm-commits
mailing list