[llvm] [X86] Only call combineBitcastToBoolVector after legalization (PR #123386)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 17 10:30:39 PST 2025
https://github.com/RKSimon created https://github.com/llvm/llvm-project/pull/123386
Prevents infinite loop between combineBitcastToBoolVector and hoistLogicOpWithSameOpcodeHands, which only performs the "logicop(bitcast(A),bitcast(B)) -> bitcast(logicop(A,B))" upto type legalization.
combineBitcastToBoolVector doesn't care much as its mainly for AVX512 cleanup that X86DomainReassignment can't handle for us.
Fixes #123333
>From f3704c2b0755f0d8ecb85e188861e8c6ec263d0f Mon Sep 17 00:00:00 2001
From: Simon Pilgrim <llvm-dev at redking.me.uk>
Date: Fri, 17 Jan 2025 18:28:44 +0000
Subject: [PATCH] [X86] Only call combineBitcastToBoolVector after legalization
Prevents infinite loop between combineBitcastToBoolVector and hoistLogicOpWithSameOpcodeHands, which only performs the "logicop(bitcast(A),bitcast(B)) -> bitcast(logicop(A,B))" upto type legalization.
Fixes #123333
---
llvm/lib/Target/X86/X86ISelLowering.cpp | 2 +-
llvm/test/CodeGen/X86/pr123333.ll | 34 +++++++++++++++++++++++++
2 files changed, 35 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/X86/pr123333.ll
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 84736f18011a9d..b74a2d7d2e8480 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -45053,7 +45053,7 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
N0 = DAG.getBitcast(MVT::i8, N0);
return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
}
- } else {
+ } else if (DCI.isAfterLegalizeDAG()) {
// If we're bitcasting from iX to vXi1, see if the integer originally
// began as a vXi1 and whether we can remove the bitcast entirely.
if (VT.isVector() && VT.getScalarType() == MVT::i1 &&
diff --git a/llvm/test/CodeGen/X86/pr123333.ll b/llvm/test/CodeGen/X86/pr123333.ll
new file mode 100644
index 00000000000000..3d2ac257f3b9a6
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr123333.ll
@@ -0,0 +1,34 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=x86_64-- -mcpu=x86-64-v4 | FileCheck %s
+
+define void @PR123333(ptr %in0, ptr %in1, ptr %in2, ptr %out0, ptr %out1) {
+; CHECK-LABEL: PR123333:
+; CHECK: # %bb.0:
+; CHECK-NEXT: kmovq (%rsi), %k0
+; CHECK-NEXT: kmovq (%rdi), %k1
+; CHECK-NEXT: korq %k1, %k0, %k2
+; CHECK-NEXT: vmovdqu8 (%rdx), %zmm0 {%k2} {z}
+; CHECK-NEXT: vmovdqu64 %zmm0, (%rcx)
+; CHECK-NEXT: kandnq %k0, %k1, %k0
+; CHECK-NEXT: knotq %k0, %k1
+; CHECK-NEXT: vmovdqu8 {{.*#+}} zmm0 {%k1} {z} = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15]
+; CHECK-NEXT: vmovdqu64 %zmm0, (%r8)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %i0 = load i64, ptr %in0, align 8
+ %i1 = load i64, ptr %in1, align 8
+ %i2 = load <64 x i8>, ptr %in2, align 1
+ %i7 = bitcast i64 %i1 to <64 x i1>
+ %i9 = bitcast i64 %i0 to <64 x i1>
+ %i12 = or i64 %i1, %i0
+ %i13 = bitcast i64 %i12 to <64 x i1>
+ %i15 = or <64 x i1> %i13, %i9
+ %i17 = select <64 x i1> %i15, <64 x i8> %i2, <64 x i8> zeroinitializer
+ store <64 x i8> %i17, ptr %out0, align 16
+ %i21 = icmp eq <64 x i8> %i2, splat (i8 4)
+ %i22 = xor <64 x i1> %i9, splat (i1 true)
+ %i23 = and <64 x i1> %i22, %i7
+ %i25 = select <64 x i1> %i23, <64 x i8> zeroinitializer, <64 x i8> splat (i8 15)
+ store <64 x i8> %i25, ptr %out1, align 16
+ ret void
+}
More information about the llvm-commits
mailing list