[llvm] 2808743 - [X86] LowerVSETCC - always split 512-bit vectors before lowering to PCMPEQ/GT (PR53842)
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 15 06:21:25 PST 2022
Author: Simon Pilgrim
Date: 2022-02-15T14:21:12Z
New Revision: 2808743cbdcee806eecd7b8799cf776727dc574b
URL: https://github.com/llvm/llvm-project/commit/2808743cbdcee806eecd7b8799cf776727dc574b
DIFF: https://github.com/llvm/llvm-project/commit/2808743cbdcee806eecd7b8799cf776727dc574b.diff
LOG: [X86] LowerVSETCC - always split 512-bit vectors before lowering to PCMPEQ/GT (PR53842)
Extend the existing split where we already do this for v32i16/v64i8
We can end up trying to use PCMPEQ/GT if the result needs to be sign-extended (typically due to the DAGCombiner::foldSextSetcc fold).
Fixes #53842
Added:
llvm/test/CodeGen/X86/pr53842.ll
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 3366b45615bab..4c622568f8d0d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -24009,10 +24009,10 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
if (VT.is256BitVector() && !Subtarget.hasInt256())
return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl);
- if (VT == MVT::v32i16 || VT == MVT::v64i8) {
- assert(!Subtarget.hasBWI() && "Unexpected VT with AVX512BW!");
+ // Break 512-bit integer vector compare into smaller ones.
+ // TODO: Try harder to use VPCMPx + VPMOV2x?
+ if (VT.is512BitVector())
return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl);
- }
// If we have a limit constant, try to form PCMPGT (signed cmp) to avoid
// not-of-PCMPEQ:
diff --git a/llvm/test/CodeGen/X86/pr53842.ll b/llvm/test/CodeGen/X86/pr53842.ll
new file mode 100644
index 0000000000000..61b58341f2244
--- /dev/null
+++ b/llvm/test/CodeGen/X86/pr53842.ll
@@ -0,0 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512dq | FileCheck %s
+
+define void @PR53842() {
+; CHECK-LABEL: PR53842:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vpxor %xmm0, %xmm0, %xmm0
+; CHECK-NEXT: vpxor %xmm1, %xmm1, %xmm1
+; CHECK-NEXT: .p2align 4, 0x90
+; CHECK-NEXT: .LBB0_1: # %vector.body
+; CHECK-NEXT: # =>This Inner Loop Header: Depth=1
+; CHECK-NEXT: vpmovzxbq {{.*#+}} zmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero
+; CHECK-NEXT: vextracti64x4 $1, %zmm2, %ymm3
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm3, %ymm3
+; CHECK-NEXT: vpcmpeqq %ymm1, %ymm2, %ymm2
+; CHECK-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm2
+; CHECK-NEXT: vpsubq %zmm2, %zmm0, %zmm0
+; CHECK-NEXT: jmp .LBB0_1
+entry:
+ br label %vector.body
+
+vector.body:
+ %index = phi i64 [ 0, %entry ], [ 0, %vector.body ]
+ %vec.phi = phi <8 x i64> [ zeroinitializer, %entry ], [ %i2, %vector.body ]
+ %wide.load23 = load <8 x i8>, <8 x i8>* undef, align 1
+ %i = icmp eq <8 x i8> zeroinitializer, %wide.load23
+ %i1 = zext <8 x i1> %i to <8 x i64>
+ %i2 = add <8 x i64> %vec.phi, %i1
+ br i1 false, label %middle.block, label %vector.body
+
+middle.block:
+ %bin.rdx = add <8 x i64> undef, %i2
+ unreachable
+}
+
More information about the llvm-commits
mailing list