[llvm] goldsteinn/icmp to fcmp (PR #82290)
via llvm-commits
llvm-commits at lists.llvm.org
Mon Feb 19 15:59:25 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
Author: None (goldsteinn)
<details>
<summary>Changes</summary>
- **[X86] Add tests for folding `icmp` of `v8i32` -> `fcmp` of `v8f32` on AVX; NFC**
- **[X86] Try Folding `icmp` of `v8i32` -> `fcmp` of `v8f32` on AVX**
---
Patch is 282.65 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/82290.diff
30 Files Affected:
- (modified) llvm/lib/Target/X86/X86ISelLowering.cpp (+104)
- (modified) llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll (+3-12)
- (modified) llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll (+8-11)
- (added) llvm/test/CodeGen/X86/cmpf-avx.ll (+242)
- (modified) llvm/test/CodeGen/X86/combine-testps.ll (+18-7)
- (modified) llvm/test/CodeGen/X86/masked_compressstore.ll (+17-17)
- (modified) llvm/test/CodeGen/X86/masked_expandload.ll (+24-24)
- (modified) llvm/test/CodeGen/X86/masked_gather.ll (+64-70)
- (modified) llvm/test/CodeGen/X86/masked_load.ll (+3-5)
- (modified) llvm/test/CodeGen/X86/masked_store.ll (+36-33)
- (modified) llvm/test/CodeGen/X86/masked_store_trunc.ll (+29-48)
- (modified) llvm/test/CodeGen/X86/masked_store_trunc_ssat.ll (+29-48)
- (modified) llvm/test/CodeGen/X86/masked_store_trunc_usat.ll (+29-48)
- (modified) llvm/test/CodeGen/X86/nontemporal-loads.ll (+7-11)
- (modified) llvm/test/CodeGen/X86/pr48215.ll (+9-10)
- (modified) llvm/test/CodeGen/X86/sadd_sat_vec.ll (+18-10)
- (modified) llvm/test/CodeGen/X86/setcc-lowering.ll (+3-3)
- (modified) llvm/test/CodeGen/X86/ssub_sat_vec.ll (+33-36)
- (modified) llvm/test/CodeGen/X86/v8i1-masks.ll (+6-10)
- (modified) llvm/test/CodeGen/X86/vec_saddo.ll (+45-45)
- (modified) llvm/test/CodeGen/X86/vec_ssubo.ll (+45-45)
- (modified) llvm/test/CodeGen/X86/vec_umulo.ll (+33-38)
- (modified) llvm/test/CodeGen/X86/vector-constrained-fp-intrinsics.ll (+7-11)
- (modified) llvm/test/CodeGen/X86/vector-pcmp.ll (+9-13)
- (modified) llvm/test/CodeGen/X86/vector-popcnt-256-ult-ugt.ll (+1493-1584)
- (modified) llvm/test/CodeGen/X86/vector-reduce-fmaximum.ll (-58)
- (modified) llvm/test/CodeGen/X86/vector-reduce-or-bool.ll (+11-13)
- (modified) llvm/test/CodeGen/X86/vector-reduce-xor-bool.ll (+10-12)
- (modified) llvm/test/CodeGen/X86/vector-sext.ll (+1-4)
- (modified) llvm/test/CodeGen/X86/vector-unsigned-cmp.ll (+8-8)
``````````diff
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index d9657e1aeb8026..b438700e69d8ca 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -23299,6 +23299,110 @@ static SDValue LowerVSETCC(SDValue Op, const X86Subtarget &Subtarget,
}
}
+ // We get bad codegen for v8i32 compares on avx targets (without avx2) so if
+ // possible convert to a v8f32 compare.
+ if (VTOp0.getVectorElementType() == MVT::i32 && VTOp0 == MVT::v8i32 &&
+ Subtarget.hasAVX() && !Subtarget.hasAVX2()) {
+ std::optional<KnownBits> KnownOps[2];
+ // Check if an op is known to be in a certain range.
+ auto OpInRange = [&DAG, Op, &KnownOps](unsigned OpNo, bool CmpLT,
+ const APInt Bound) {
+ if (!KnownOps[OpNo].has_value())
+ KnownOps[OpNo] = DAG.computeKnownBits(Op.getOperand(OpNo));
+
+ if (KnownOps[OpNo]->isUnknown())
+ return false;
+
+ std::optional<bool> Res;
+ if (CmpLT)
+ Res = KnownBits::ult(*KnownOps[OpNo], KnownBits::makeConstant(Bound));
+ else
+ Res = KnownBits::ugt(*KnownOps[OpNo], KnownBits::makeConstant(Bound));
+ return Res.has_value() && *Res;
+ };
+
+ bool OkayCvt = false;
+ bool OkayBitcast = false;
+
+ // For cvt up to 1 << (Significand Precision)
+ const APInt MaxConvertableCvt = APInt(32, (1U << 24));
+ // For bitcast up to (and including) first inf representation (0x7f800000)
+ const APInt MaxConvertableBitcast = APInt(32, 0x7f800001);
+
+ // For bitcast we need both lhs/op1 u< MaxConvertableBitcast
+ // NB: It might be worth it to enable to bitcast version for unsigned avx2
+ // comparisons as they typically require multiple instructions to lower
+ // (they don't fit `vpcmpeq`/`vpcmpgt` well).
+ if (OpInRange(1, /*CmpLT*/ true, MaxConvertableBitcast) &&
+ OpInRange(0, /*CmpLT*/ true, MaxConvertableBitcast)) {
+ OkayBitcast = true;
+ }
+ // We want to convert icmp -> fcmp using `sitofp` iff one of the converts
+ // will be constant folded.
+ else if ((DAG.isConstantValueOfAnyType(peekThroughBitcasts(Op1)) ||
+ DAG.isConstantValueOfAnyType(peekThroughBitcasts(Op0)))) {
+ if (isUnsignedIntSetCC(Cond)) {
+ // For cvt + unsigned compare we need both lhs/rhs >= 0 and either lhs
+ // or rhs < MaxConvertableCvt
+
+ if (OpInRange(1, /*CmpLT*/ true, APInt::getSignedMinValue(32)) &&
+ OpInRange(0, /*CmpLT*/ true, APInt::getSignedMinValue(32)) &&
+ (OpInRange(1, /*CmpLT*/ true, MaxConvertableCvt) ||
+ OpInRange(0, /*CmpLT*/ true, MaxConvertableCvt)))
+ OkayCvt = true;
+ } else {
+ // For cvt + signed compare we need abs(lhs) or abs(rhs) <
+ // MaxConvertableCvt
+ if (OpInRange(1, /*CmpLT*/ true, MaxConvertableCvt) ||
+ OpInRange(1, /*CmpLT*/ false, -MaxConvertableCvt) ||
+ OpInRange(0, /*CmpLT*/ true, MaxConvertableCvt) ||
+ OpInRange(0, /*CmpLT*/ false, -MaxConvertableCvt))
+ OkayCvt = true;
+ }
+ }
+
+ if (OkayBitcast || OkayCvt) {
+ switch (Cond) {
+ default:
+ llvm_unreachable("Unexpected SETCC condition");
+ // Get the new FP condition. Note for the unsigned conditions we have
+ // verified its okay to convert to the signed version.
+ case ISD::SETULT:
+ case ISD::SETLT:
+ Cond = ISD::SETOLT;
+ break;
+ case ISD::SETUGT:
+ case ISD::SETGT:
+ Cond = ISD::SETOGT;
+ break;
+ case ISD::SETULE:
+ case ISD::SETLE:
+ Cond = ISD::SETOLE;
+ break;
+ case ISD::SETUGE:
+ case ISD::SETGE:
+ Cond = ISD::SETOGE;
+ break;
+ case ISD::SETEQ:
+ Cond = ISD::SETOEQ;
+ break;
+ case ISD::SETNE:
+ Cond = ISD::SETONE;
+ break;
+ }
+
+ MVT FpVT = MVT::getVectorVT(MVT::f32, VT.getVectorElementCount());
+ if (OkayBitcast) {
+ Op0 = DAG.getBitcast(FpVT, Op0);
+ Op1 = DAG.getBitcast(FpVT, Op1);
+ } else {
+ Op0 = DAG.getNode(ISD::SINT_TO_FP, dl, FpVT, Op0);
+ Op1 = DAG.getNode(ISD::SINT_TO_FP, dl, FpVT, Op1);
+ }
+ return DAG.getSetCC(dl, VT, Op0, Op1, Cond);
+ }
+ }
+
// Break 256-bit integer vector compare into smaller ones.
if (VT.is256BitVector() && !Subtarget.hasInt256())
return splitIntVSETCC(VT, Op0, Op1, Cond, DAG, dl);
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
index 6255621d870e12..d680135be56418 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-sext.ll
@@ -258,10 +258,7 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
-; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; AVX1-NEXT: vcmpeqps %ymm1, %ymm0, %ymm0
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i8_8i32:
@@ -489,16 +486,10 @@ define <16 x i32> @ext_i16_16i32(i16 %a0) {
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2
-; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
-; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vcmpeqps %ymm0, %ymm2, %ymm0
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2
-; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
-; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
+; AVX1-NEXT: vcmpeqps %ymm2, %ymm1, %ymm1
; AVX1-NEXT: retq
;
; AVX2-LABEL: ext_i16_16i32:
diff --git a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
index ee39b1333fff31..c7785c56972d8c 100644
--- a/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
+++ b/llvm/test/CodeGen/X86/bitcast-int-to-vector-bool-zext.ll
@@ -327,10 +327,9 @@ define <8 x i32> @ext_i8_8i32(i8 %a0) {
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
; AVX1-NEXT: vmovaps {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128]
; AVX1-NEXT: vandps %ymm1, %ymm0, %ymm0
-; AVX1-NEXT: vpcmpeqd %xmm1, %xmm0, %xmm1
-; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
+; AVX1-NEXT: vcmpeqps %ymm1, %ymm0, %ymm0
+; AVX1-NEXT: vpsrld $31, %xmm0, %xmm1
; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
-; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
; AVX1-NEXT: retq
@@ -630,18 +629,16 @@ define <16 x i32> @ext_i16_16i32(i16 %a0) {
; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm1
; AVX1-NEXT: vmovaps {{.*#+}} ymm0 = [1,2,4,8,16,32,64,128]
; AVX1-NEXT: vandps %ymm0, %ymm1, %ymm2
-; AVX1-NEXT: vpcmpeqd %xmm0, %xmm2, %xmm0
+; AVX1-NEXT: vcmpeqps %ymm0, %ymm2, %ymm0
+; AVX1-NEXT: vpsrld $31, %xmm0, %xmm2
+; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX1-NEXT: vpsrld $31, %xmm0, %xmm0
-; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2
-; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm2, %xmm2
-; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
-; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0
; AVX1-NEXT: vmovaps {{.*#+}} ymm2 = [256,512,1024,2048,4096,8192,16384,32768]
; AVX1-NEXT: vandps %ymm2, %ymm1, %ymm1
-; AVX1-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm2
-; AVX1-NEXT: vpsrld $31, %xmm2, %xmm2
+; AVX1-NEXT: vcmpeqps %ymm2, %ymm1, %ymm1
+; AVX1-NEXT: vpsrld $31, %xmm1, %xmm2
; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1
-; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1
; AVX1-NEXT: vpsrld $31, %xmm1, %xmm1
; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1
; AVX1-NEXT: retq
diff --git a/llvm/test/CodeGen/X86/cmpf-avx.ll b/llvm/test/CodeGen/X86/cmpf-avx.ll
new file mode 100644
index 00000000000000..f343e53b7fab83
--- /dev/null
+++ b/llvm/test/CodeGen/X86/cmpf-avx.ll
@@ -0,0 +1,242 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=CHECK,X64
+
+define <8 x i32> @cmp_eq_bitcast(<8 x i32> %x) {
+; X86-LABEL: cmp_eq_bitcast:
+; X86: # %bb.0:
+; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: cmp_eq_bitcast:
+; X64: # %bb.0:
+; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; X64-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; X64-NEXT: retq
+ %and = and <8 x i32> %x, <i32 7, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ %cmp = icmp eq <8 x i32> %and, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ %sext = sext <8 x i1> %cmp to <8 x i32>
+ ret <8 x i32> %sext
+}
+
+define <8 x i32> @cmp_ne_sitofp(<8 x i32> %x) {
+; X86-LABEL: cmp_ne_sitofp:
+; X86: # %bb.0:
+; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
+; X86-NEXT: vcmpneq_oqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: cmp_ne_sitofp:
+; X64: # %bb.0:
+; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
+; X64-NEXT: vcmpneq_oqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; X64-NEXT: retq
+ %cmp = icmp ne <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ %sext = sext <8 x i1> %cmp to <8 x i32>
+ ret <8 x i32> %sext
+}
+
+define <8 x i32> @cmp_slt_fail_no_const(<8 x i32> %x, <8 x i32> %y) {
+; X86-LABEL: cmp_slt_fail_no_const:
+; X86: # %bb.0:
+; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: vextractf128 $1, %ymm1, %xmm2
+; X86-NEXT: vextractf128 $1, %ymm0, %xmm3
+; X86-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
+; X86-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; X86-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: cmp_slt_fail_no_const:
+; X64: # %bb.0:
+; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; X64-NEXT: vextractf128 $1, %ymm1, %xmm2
+; X64-NEXT: vextractf128 $1, %ymm0, %xmm3
+; X64-NEXT: vpcmpgtd %xmm3, %xmm2, %xmm2
+; X64-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; X64-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; X64-NEXT: retq
+ %and = and <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ %cmp = icmp slt <8 x i32> %and, %y
+ %sext = sext <8 x i1> %cmp to <8 x i32>
+ ret <8 x i32> %sext
+}
+
+define <8 x i32> @cmp_eq_sitofp(<8 x i32> %x) {
+; X86-LABEL: cmp_eq_sitofp:
+; X86: # %bb.0:
+; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
+; X86-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: cmp_eq_sitofp:
+; X64: # %bb.0:
+; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
+; X64-NEXT: vcmpeqps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; X64-NEXT: retq
+ %cmp = icmp eq <8 x i32> %x, <i32 -3, i32 -3, i32 -3, i32 -3, i32 -3, i32 -3, i32 -3, i32 -3>
+ %sext = sext <8 x i1> %cmp to <8 x i32>
+ ret <8 x i32> %sext
+}
+
+define <8 x i32> @cmp_sgt_fail_no_bounds(<8 x i32> %x, <8 x i32> %y) {
+; CHECK-LABEL: cmp_sgt_fail_no_bounds:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm2
+; CHECK-NEXT: vextractf128 $1, %ymm1, %xmm3
+; CHECK-NEXT: vpcmpgtd %xmm2, %xmm3, %xmm2
+; CHECK-NEXT: vpcmpgtd %xmm0, %xmm1, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %cmp = icmp slt <8 x i32> %x, %y
+ %sext = sext <8 x i1> %cmp to <8 x i32>
+ ret <8 x i32> %sext
+}
+
+define <8 x i32> @cmp_sgt_bitcast(<8 x i32> %xx, <8 x i32> %yy) {
+; CHECK-LABEL: cmp_sgt_bitcast:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vbroadcastss {{.*#+}} ymm2 = [2139095040,2139095040,2139095040,2139095040,2139095040,2139095040,2139095040,2139095040]
+; CHECK-NEXT: vandps %ymm2, %ymm0, %ymm0
+; CHECK-NEXT: vandps %ymm2, %ymm1, %ymm1
+; CHECK-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %x = and <8 x i32> %xx, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
+ %y = and <8 x i32> %yy, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
+
+ %cmp = icmp sgt <8 x i32> %x, %y
+ %sext = sext <8 x i1> %cmp to <8 x i32>
+ ret <8 x i32> %sext
+}
+
+define <8 x i32> @cmp_sle_fail_out_of_bounds(<8 x i32> %xx) {
+; X86-LABEL: cmp_sle_fail_out_of_bounds:
+; X86: # %bb.0:
+; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: vextractf128 $1, %ymm0, %xmm1
+; X86-NEXT: vbroadcastss {{.*#+}} xmm2 = [2139095041,2139095041,2139095041,2139095041]
+; X86-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
+; X86-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0
+; X86-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: cmp_sle_fail_out_of_bounds:
+; X64: # %bb.0:
+; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; X64-NEXT: vextractf128 $1, %ymm0, %xmm1
+; X64-NEXT: vbroadcastss {{.*#+}} xmm2 = [2139095041,2139095041,2139095041,2139095041]
+; X64-NEXT: vpcmpgtd %xmm1, %xmm2, %xmm1
+; X64-NEXT: vpcmpgtd %xmm0, %xmm2, %xmm0
+; X64-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; X64-NEXT: retq
+ %x = and <8 x i32> %xx, <i32 2139095041, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
+ %cmp = icmp sle <8 x i32> %x, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
+ %sext = sext <8 x i1> %cmp to <8 x i32>
+ ret <8 x i32> %sext
+}
+
+define <8 x i32> @cmp_eq_fail_out_of_bounds(<8 x i32> %x) {
+; CHECK-LABEL: cmp_eq_fail_out_of_bounds:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vbroadcastss {{.*#+}} xmm2 = [16777216,16777216,16777216,16777216]
+; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %cmp = icmp eq <8 x i32> %x, <i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216, i32 16777216>
+ %sext = sext <8 x i1> %cmp to <8 x i32>
+ ret <8 x i32> %sext
+}
+
+define <8 x i32> @cmp_eq_fail_out_of_bounds2(<8 x i32> %x) {
+; CHECK-LABEL: cmp_eq_fail_out_of_bounds2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vbroadcastss {{.*#+}} xmm2 = [4278190080,4278190080,4278190080,4278190080]
+; CHECK-NEXT: vpcmpeqd %xmm2, %xmm1, %xmm1
+; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %cmp = icmp eq <8 x i32> %x, <i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216, i32 -16777216>
+ %sext = sext <8 x i1> %cmp to <8 x i32>
+ ret <8 x i32> %sext
+}
+
+define <8 x i32> @cmp_eq_todo(<8 x i32> %x) {
+; X86-LABEL: cmp_eq_todo:
+; X86: # %bb.0:
+; X86-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm1
+; X86-NEXT: vextractf128 $1, %ymm0, %xmm0
+; X86-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm0, %xmm0
+; X86-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: cmp_eq_todo:
+; X64: # %bb.0:
+; X64-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1
+; X64-NEXT: vextractf128 $1, %ymm0, %xmm0
+; X64-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
+; X64-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0
+; X64-NEXT: retq
+ %cmp = icmp eq <8 x i32> %x, <i32 -16777215, i32 16777215, i32 16777215, i32 -16777215, i32 16777215, i32 -16777215, i32 16777215, i32 -16777215>
+ %sext = sext <8 x i1> %cmp to <8 x i32>
+ ret <8 x i32> %sext
+}
+
+define <8 x i32> @cmp_ult_fail_maybe_negative(<8 x i32> %x) {
+; CHECK-LABEL: cmp_ult_fail_maybe_negative:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm1
+; CHECK-NEXT: vbroadcastss {{.*#+}} xmm2 = [2,2,2,2]
+; CHECK-NEXT: vpminud %xmm2, %xmm1, %xmm3
+; CHECK-NEXT: vpcmpeqd %xmm3, %xmm1, %xmm1
+; CHECK-NEXT: vpminud %xmm2, %xmm0, %xmm2
+; CHECK-NEXT: vpcmpeqd %xmm2, %xmm0, %xmm0
+; CHECK-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
+; CHECK-NEXT: ret{{[l|q]}}
+ %cmp = icmp ult <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ %sext = sext <8 x i1> %cmp to <8 x i32>
+ ret <8 x i32> %sext
+}
+
+define <8 x i32> @cmp_ule_bitcast(<8 x i32> %xx) {
+; X86-LABEL: cmp_ule_bitcast:
+; X86: # %bb.0:
+; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: cmp_ule_bitcast:
+; X64: # %bb.0:
+; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; X64-NEXT: vcmpltps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; X64-NEXT: retq
+ %x = and <8 x i32> %xx, <i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040, i32 2139095040>
+ %cmp = icmp ule <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ %sext = sext <8 x i1> %cmp to <8 x i32>
+ ret <8 x i32> %sext
+}
+
+define <8 x i32> @cmp_ugt_sitofp(<8 x i32> %xx) {
+; X86-LABEL: cmp_ugt_sitofp:
+; X86: # %bb.0:
+; X86-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}, %ymm0, %ymm0
+; X86-NEXT: vcvtdq2ps %ymm0, %ymm0
+; X86-NEXT: vbroadcastss {{.*#+}} ymm1 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
+; X86-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
+; X86-NEXT: retl
+;
+; X64-LABEL: cmp_ugt_sitofp:
+; X64: # %bb.0:
+; X64-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0
+; X64-NEXT: vcvtdq2ps %ymm0, %ymm0
+; X64-NEXT: vbroadcastss {{.*#+}} ymm1 = [3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0,3.0E+0]
+; X64-NEXT: vcmpltps %ymm0, %ymm1, %ymm0
+; X64-NEXT: retq
+ %x = and <8 x i32> %xx, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
+ %cmp = icmp ugt <8 x i32> %x, <i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3, i32 3>
+ %sext = sext <8 x i1> %cmp to <8 x i32>
+ ret <8 x i32> %sext
+}
diff --git a/llvm/test/CodeGen/X86/combine-testps.ll b/llvm/test/CodeGen/X86/combine-testps.ll
index 43dddbecf51a7d..66165ce2aa53a5 100644
--- a/llvm/test/CodeGen/X86/combine-testps.ll
+++ b/llvm/test/CodeGen/X86/combine-testps.ll
@@ -171,13 +171,24 @@ define i32 @testpsz_128_signbit(<4 x float> %c, <4 x float> %d, i32 %a, i32 %b)
}
define i32 @testpsnzc_256_signbit(<8 x float> %c, <8 x float> %d, i32 %a, i32 %b) {
-; CHECK-LABEL: testpsnzc_256_signbit:
-; CHECK: # %bb.0:
-; CHECK-NEXT: movl %edi, %eax
-; CHECK-NEXT: vtestps %ymm1, %ymm0
-; CHECK-NEXT: cmovnel %esi, %eax
-; CHECK-NEXT: vzeroupper
-; CHECK-NEXT: retq
+; AVX-LABEL: testpsnzc_256_signbit:
+; AVX: # %bb.0:
+; AVX-NEXT: movl %edi, %eax
+; AVX-NEXT: vcvtdq2ps %ymm0, %ymm0
+; AVX-NEXT: vxorps %xmm2, %xmm2, %xmm2
+; AVX-NEXT: vcmpltps %ymm2, %ymm0, %ymm0
+; AVX-NEXT: vtestps %ymm1, %ymm0
+; AVX-NEXT: cmovnel %esi, %eax
+; AVX-NEXT: vzeroupper
+; AVX-NEXT: retq
+;
+; AVX2-LABEL: testpsnzc_256_signbit:
+; AVX2: # %bb.0:
+; AVX2-NEXT: movl %edi, %eax
+; AVX2-NEXT: vtestps %ymm1, %ymm0
+; AVX2-NEXT: cmovnel %esi, %eax
+; AVX2-NEXT: vzeroupper
+; AVX2-NEXT: retq
%t0 = b...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/82290
More information about the llvm-commits
mailing list