[llvm] [X86, CodeGen] Add a pattern in PreprocessISelDAG. (PR #85848)

Tue Mar 19 12:05:26 PDT 2024

https://github.com/DataCorrupted created https://github.com/llvm/llvm-project/pull/85848

`%c = shl <X x i1> %a, %b -> %c = %a`
As discussed in #85681, this pattern is added to avoid cannot selection error. Since shift one is either identical or UB, we consider it as identical and replace all uses of %c with %a.

Tests for shl, ashr, lshr are added.
This fixes #85681.

>From e8dda973740b8ce038489101aa12fda9abe8673e Mon Sep 17 00:00:00 2001
From: Peter Rong <PeterRong96 at gmail.com>
Date: Tue, 19 Mar 2024 19:01:44 +0000
Subject: [PATCH] [X86] Add a pattern in PreprocessISelDAG.

`%c = shl <X x i1> %a, %b -> %c = %a`
As discussed in #85681, this pattern is added to avoid cannot selection error.
Since shift one is either identical or UB, we consider it as identical and replace all uses of %c with %a.

Tests for shl, ashr, lshr are added.
This fixes #85681.

Signed-off-by: Peter Rong <PeterRong96 at gmail.com>
---
 llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 11 ++++++-
 llvm/test/CodeGen/X86/pr85681.ll        | 39 +++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 1 deletion(-)
 create mode 100644 llvm/test/CodeGen/X86/pr85681.ll

diff --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index 4e4241efd63d6b..824a86225b4dd6 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -1151,10 +1151,19 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
     case ISD::SHL:
     case ISD::SRA:
     case ISD::SRL: {
+      auto Ty = N->getValueType(0);
       // Replace vector shifts with their X86 specific equivalent so we don't
       // need 2 sets of patterns.
-      if (!N->getValueType(0).isVector())
+      if (!Ty.isVector())
+        break;
+
+      // Per discussion in #85681, `%c = shl <X x i1> %a, %b -> %c = %a`,
+      // aka, replace all `%c` with `%a` and erase this instruction.
+      if (Ty.getVectorElementType().getFixedSizeInBits() == 1) {
+        CurDAG->ReplaceAllUsesOfValueWith(SDValue(N, 0), N->getOperand(0));
+        CurDAG->DeleteNode(N);
         break;
+      }
 
       unsigned NewOpc;
       switch (N->getOpcode()) {
diff --git a/llvm/test/CodeGen/X86/pr85681.ll b/llvm/test/CodeGen/X86/pr85681.ll
new file mode 100644
index 00000000000000..8958d60cc561ea
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr85681.ll
@@ -0,0 +1,39 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=emeraldrapids | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=sapphirerapids | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=znver4 | FileCheck %s
+
+define i32 @shl(i32 %a0) {
+; CHECK-LABEL: shl:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    kxnord %k0, %k0, %k0
+; CHECK-NEXT:    kmovd %k0, %eax
+; CHECK-NEXT:    retq
+  %v0 = bitcast i32 %a0 to <32 x i1>
+  %s = shl <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, %v0
+  %r = bitcast <32 x i1> %s to i32
+  ret i32 %r
+}
+
+define i32 @lshr(i32 %a0) {
+; CHECK-LABEL: lshr:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl $-1, %eax
+; CHECK-NEXT:    retq
+  %v0 = bitcast i32 %a0 to <32 x i1>
+  %s = lshr <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, %v0
+  %r = bitcast <32 x i1> %s to i32
+  ret i32 %r
+}
+
+
+define i32 @ashr(i32 %a0) {
+; CHECK-LABEL: ashr:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movl $-1, %eax
+; CHECK-NEXT:    retq
+  %v0 = bitcast i32 %a0 to <32 x i1>
+  %s = ashr <32 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, %v0
+  %r = bitcast <32 x i1> %s to i32
+  ret i32 %r
+}