[llvm] ada0356 - [X86] Extend all_of(icmp_eq()) / any_of(icmp_ne()) -> scalar integer fold to AVX512 targets
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Wed Mar 22 08:56:44 PDT 2023
Author: Simon Pilgrim
Date: 2023-03-22T15:56:23Z
New Revision: ada03565261ab6ef1c5bca217767fe7f69d19a99
URL: https://github.com/llvm/llvm-project/commit/ada03565261ab6ef1c5bca217767fe7f69d19a99
DIFF: https://github.com/llvm/llvm-project/commit/ada03565261ab6ef1c5bca217767fe7f69d19a99.diff
LOG: [X86] Extend all_of(icmp_eq()) / any_of(icmp_ne()) -> scalar integer fold to AVX512 targets
Extends 1bb95a3a99cb44f2b8b801e5137d3ac529253f3b to combine on AVX512 targets where the vXi1 type is legal
Continues work on addressing Issue #53419
Added:
Modified:
llvm/lib/Target/X86/X86ISelLowering.cpp
llvm/test/CodeGen/X86/pr53419.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index a491ba84bf70..6cf359d6d217 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -44646,6 +44646,23 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
// Special case for (pre-legalization) vXi1 reductions.
if (NumElts > 64 || !isPowerOf2_32(NumElts))
return SDValue();
+ if (Match.getOpcode() == ISD::SETCC) {
+ ISD::CondCode CC = cast<CondCodeSDNode>(Match.getOperand(2))->get();
+ if ((BinOp == ISD::AND && CC == ISD::CondCode::SETEQ) ||
+ (BinOp == ISD::OR && CC == ISD::CondCode::SETNE)) {
+ // If representable as a scalar integer:
+ // For all_of(setcc(x,y,eq)) - use (iX)x == (iX)y.
+ // For any_of(setcc(x,y,ne)) - use (iX)x != (iX)y.
+ EVT VecVT = Match.getOperand(0).getValueType();
+ EVT IntVT = EVT::getIntegerVT(Ctx, VecVT.getSizeInBits());
+ if (TLI.isTypeLegal(IntVT)) {
+ SDValue LHS = DAG.getFreeze(Match.getOperand(0));
+ SDValue RHS = DAG.getFreeze(Match.getOperand(1));
+ return DAG.getSetCC(DL, ExtractVT, DAG.getBitcast(IntVT, LHS),
+ DAG.getBitcast(IntVT, RHS), CC);
+ }
+ }
+ }
if (TLI.isTypeLegal(MatchVT)) {
// If this is a legal AVX512 predicate type then we can just bitcast.
EVT MovmskVT = EVT::getIntegerVT(Ctx, NumElts);
@@ -44657,20 +44674,7 @@ static SDValue combinePredicateReduction(SDNode *Extract, SelectionDAG &DAG,
ISD::CondCode CC = cast<CondCodeSDNode>(Match.getOperand(2))->get();
if ((BinOp == ISD::AND && CC == ISD::CondCode::SETEQ) ||
(BinOp == ISD::OR && CC == ISD::CondCode::SETNE)) {
- EVT VecVT = Match.getOperand(0).getValueType();
-
- // If representable as a scalar integer:
- // For all_of(setcc(x,y,eq)) - use (iX)x == (iX)y.
- // For any_of(setcc(x,y,ne)) - use (iX)x != (iX)y.
- EVT IntVT = EVT::getIntegerVT(Ctx, VecVT.getSizeInBits());
- if (TLI.isTypeLegal(IntVT)) {
- SDValue LHS = DAG.getFreeze(Match.getOperand(0));
- SDValue RHS = DAG.getFreeze(Match.getOperand(1));
- return DAG.getSetCC(DL, ExtractVT, DAG.getBitcast(IntVT, LHS),
- DAG.getBitcast(IntVT, RHS), CC);
- }
-
- EVT VecSVT = VecVT.getScalarType();
+ EVT VecSVT = Match.getOperand(0).getValueType().getScalarType();
if (VecSVT != MVT::i8 && (VecSVT.getSizeInBits() % 8) == 0) {
NumElts *= VecSVT.getSizeInBits() / 8;
EVT CmpVT = EVT::getVectorVT(Ctx, MVT::i8, NumElts);
diff --git a/llvm/test/CodeGen/X86/pr53419.ll b/llvm/test/CodeGen/X86/pr53419.ll
index d92a7ceecec4..9455810fa2d7 100644
--- a/llvm/test/CodeGen/X86/pr53419.ll
+++ b/llvm/test/CodeGen/X86/pr53419.ll
@@ -13,32 +13,12 @@ declare i1 @llvm.vector.reduce.and.v8i1(<8 x i1>)
; FIXME: All four versions are semantically equivalent and should produce same asm as scalar version.
define i1 @intrinsic_v2i8(ptr align 1 %arg, ptr align 1 %arg1) {
-; SSE-LABEL: intrinsic_v2i8:
-; SSE: # %bb.0: # %bb
-; SSE-NEXT: movzwl (%rdi), %eax
-; SSE-NEXT: cmpw %ax, (%rsi)
-; SSE-NEXT: sete %al
-; SSE-NEXT: retq
-;
-; AVX-LABEL: intrinsic_v2i8:
-; AVX: # %bb.0: # %bb
-; AVX-NEXT: movzwl (%rdi), %eax
-; AVX-NEXT: cmpw %ax, (%rsi)
-; AVX-NEXT: sete %al
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: intrinsic_v2i8:
-; AVX512: # %bb.0: # %bb
-; AVX512-NEXT: movzwl (%rsi), %eax
-; AVX512-NEXT: vmovd %eax, %xmm0
-; AVX512-NEXT: movzwl (%rdi), %eax
-; AVX512-NEXT: vmovd %eax, %xmm1
-; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
-; AVX512-NEXT: knotw %k0, %k0
-; AVX512-NEXT: kmovd %k0, %eax
-; AVX512-NEXT: testb $3, %al
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: retq
+; X64-LABEL: intrinsic_v2i8:
+; X64: # %bb.0: # %bb
+; X64-NEXT: movzwl (%rdi), %eax
+; X64-NEXT: cmpw %ax, (%rsi)
+; X64-NEXT: sete %al
+; X64-NEXT: retq
;
; X86-LABEL: intrinsic_v2i8:
; X86: # %bb.0: # %bb
@@ -57,30 +37,12 @@ bb:
}
define i1 @intrinsic_v4i8(ptr align 1 %arg, ptr align 1 %arg1) {
-; SSE-LABEL: intrinsic_v4i8:
-; SSE: # %bb.0: # %bb
-; SSE-NEXT: movl (%rdi), %eax
-; SSE-NEXT: cmpl %eax, (%rsi)
-; SSE-NEXT: sete %al
-; SSE-NEXT: retq
-;
-; AVX-LABEL: intrinsic_v4i8:
-; AVX: # %bb.0: # %bb
-; AVX-NEXT: movl (%rdi), %eax
-; AVX-NEXT: cmpl %eax, (%rsi)
-; AVX-NEXT: sete %al
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: intrinsic_v4i8:
-; AVX512: # %bb.0: # %bb
-; AVX512-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
-; AVX512-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
-; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
-; AVX512-NEXT: knotw %k0, %k0
-; AVX512-NEXT: kmovd %k0, %eax
-; AVX512-NEXT: testb $15, %al
-; AVX512-NEXT: sete %al
-; AVX512-NEXT: retq
+; X64-LABEL: intrinsic_v4i8:
+; X64: # %bb.0: # %bb
+; X64-NEXT: movl (%rdi), %eax
+; X64-NEXT: cmpl %eax, (%rsi)
+; X64-NEXT: sete %al
+; X64-NEXT: retq
;
; X86-LABEL: intrinsic_v4i8:
; X86: # %bb.0: # %bb
@@ -99,28 +61,12 @@ bb:
}
define i1 @intrinsic_v8i8(ptr align 1 %arg, ptr align 1 %arg1) {
-; SSE-LABEL: intrinsic_v8i8:
-; SSE: # %bb.0: # %bb
-; SSE-NEXT: movq (%rdi), %rax
-; SSE-NEXT: cmpq %rax, (%rsi)
-; SSE-NEXT: sete %al
-; SSE-NEXT: retq
-;
-; AVX-LABEL: intrinsic_v8i8:
-; AVX: # %bb.0: # %bb
-; AVX-NEXT: movq (%rdi), %rax
-; AVX-NEXT: cmpq %rax, (%rsi)
-; AVX-NEXT: sete %al
-; AVX-NEXT: retq
-;
-; AVX512-LABEL: intrinsic_v8i8:
-; AVX512: # %bb.0: # %bb
-; AVX512-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero
-; AVX512-NEXT: vmovq {{.*#+}} xmm1 = mem[0],zero
-; AVX512-NEXT: vpcmpeqb %xmm1, %xmm0, %k0
-; AVX512-NEXT: kortestb %k0, %k0
-; AVX512-NEXT: setb %al
-; AVX512-NEXT: retq
+; X64-LABEL: intrinsic_v8i8:
+; X64: # %bb.0: # %bb
+; X64-NEXT: movq (%rdi), %rax
+; X64-NEXT: cmpq %rax, (%rsi)
+; X64-NEXT: sete %al
+; X64-NEXT: retq
;
; X86-LABEL: intrinsic_v8i8:
; X86: # %bb.0: # %bb
More information about the llvm-commits
mailing list