[llvm] a1b53db - Revert "[SLP] Consider alternatives for cost of select instructions."
Florian Hahn via llvm-commits
llvm-commits at lists.llvm.org
Fri Oct 30 14:26:24 PDT 2020
Author: Florian Hahn
Date: 2020-10-30T21:26:14Z
New Revision: a1b53db32418cb6ed6f5b2054d15a22b5aa3aeb9
URL: https://github.com/llvm/llvm-project/commit/a1b53db32418cb6ed6f5b2054d15a22b5aa3aeb9
DIFF: https://github.com/llvm/llvm-project/commit/a1b53db32418cb6ed6f5b2054d15a22b5aa3aeb9.diff
LOG: Revert "[SLP] Consider alternatives for cost of select instructions."
This reverts commit 19225704890632cd2552f41ada41600a20db1371.
This appears to cause a crash in the following example
a, b, c;
l() {
int e = a, f = l, g, h, i, j;
float *d = c, *k = b;
for (;;)
for (; g < f; g++) {
k[h] = d[i];
k[h - 1] = d[j];
h += e << 1;
i += e;
}
}
clang -cc1 -triple i386-unknown-linux-gnu -emit-obj -target-cpu pentium-m -O1 -vectorize-loops -vectorize-slp reduced.c
llvm::Type *llvm::Type::getWithNewBitWidth(unsigned int) const: Assertion `isIntOrIntVectorTy() && "Original type expected to be a vector of integers or a scalar integer."' failed.
Added:
Modified:
llvm/include/llvm/Analysis/ValueTracking.h
llvm/lib/Analysis/ValueTracking.cpp
llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll
Removed:
################################################################################
diff --git a/llvm/include/llvm/Analysis/ValueTracking.h b/llvm/include/llvm/Analysis/ValueTracking.h
index f8ca8b8015bf..d14e181f379f 100644
--- a/llvm/include/llvm/Analysis/ValueTracking.h
+++ b/llvm/include/llvm/Analysis/ValueTracking.h
@@ -728,14 +728,6 @@ constexpr unsigned MaxAnalysisRecursionDepth = 6;
/// minimum/maximum flavor.
CmpInst::Predicate getInverseMinMaxPred(SelectPatternFlavor SPF);
- /// Check if the values in \p VL are select instructions that can be converted
- /// to a min or max (vector) intrinsic. Returns the intrinsic ID, if such a
- /// conversion is possible, together with a bool indicating whether all select
- /// conditions are only used by the selects. Otherwise return
- /// Intrinsic::not_intrinsic.
- std::pair<Intrinsic::ID, bool>
- canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL);
-
/// Return true if RHS is known to be implied true by LHS. Return false if
/// RHS is known to be implied false by LHS. Otherwise, return None if no
/// implication can be made.
diff --git a/llvm/lib/Analysis/ValueTracking.cpp b/llvm/lib/Analysis/ValueTracking.cpp
index ca4bb756e7ad..05eb2fc48270 100644
--- a/llvm/lib/Analysis/ValueTracking.cpp
+++ b/llvm/lib/Analysis/ValueTracking.cpp
@@ -5991,45 +5991,6 @@ CmpInst::Predicate llvm::getInverseMinMaxPred(SelectPatternFlavor SPF) {
return getMinMaxPred(getInverseMinMaxFlavor(SPF));
}
-std::pair<Intrinsic::ID, bool>
-llvm::canConvertToMinOrMaxIntrinsic(ArrayRef<Value *> VL) {
- // Check if VL contains select instructions that can be folded into a min/max
- // vector intrinsic and return the intrinsic if it is possible.
- // TODO: Support floating point min/max.
- bool AllCmpSingleUse = true;
- SelectPatternResult SelectPattern;
- SelectPattern.Flavor = SPF_UNKNOWN;
- if (all_of(VL, [&SelectPattern, &AllCmpSingleUse](Value *I) {
- Value *LHS, *RHS;
- auto CurrentPattern = matchSelectPattern(I, LHS, RHS);
- if (!SelectPatternResult::isMinOrMax(CurrentPattern.Flavor) ||
- CurrentPattern.Flavor == SPF_FMINNUM ||
- CurrentPattern.Flavor == SPF_FMAXNUM)
- return false;
- if (SelectPattern.Flavor != SPF_UNKNOWN &&
- SelectPattern.Flavor != CurrentPattern.Flavor)
- return false;
- SelectPattern = CurrentPattern;
- AllCmpSingleUse &=
- match(I, m_Select(m_OneUse(m_Value()), m_Value(), m_Value()));
- return true;
- })) {
- switch (SelectPattern.Flavor) {
- case SPF_SMIN:
- return {Intrinsic::smin, AllCmpSingleUse};
- case SPF_UMIN:
- return {Intrinsic::umin, AllCmpSingleUse};
- case SPF_SMAX:
- return {Intrinsic::smax, AllCmpSingleUse};
- case SPF_UMAX:
- return {Intrinsic::umax, AllCmpSingleUse};
- default:
- llvm_unreachable("unexpected select pattern flavor");
- }
- }
- return {Intrinsic::not_intrinsic, false};
-}
-
/// Return true if "icmp Pred LHS RHS" is always true.
static bool isTruePredicate(CmpInst::Predicate Pred, const Value *LHS,
const Value *RHS, const DataLayout &DL,
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 576d08a0e9dd..ce546045e456 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -3549,21 +3549,6 @@ int BoUpSLP::getEntryCost(TreeEntry *E) {
int ScalarCost = VecTy->getNumElements() * ScalarEltCost;
int VecCost = TTI->getCmpSelInstrCost(E->getOpcode(), VecTy, MaskTy,
CostKind, VL0);
- // Check if it is possible and profitable to use min/max for selects in
- // VL.
- //
- auto IntrinsicAndUse = canConvertToMinOrMaxIntrinsic(VL);
- if (IntrinsicAndUse.first != Intrinsic::not_intrinsic) {
- IntrinsicCostAttributes CostAttrs(IntrinsicAndUse.first, VecTy,
- {VecTy, VecTy});
- int IntrinsicCost = TTI->getIntrinsicInstrCost(CostAttrs, CostKind);
- // If the selects are the only uses of the compares, they will be dead
- // and we can adjust the cost by removing their cost.
- if (IntrinsicAndUse.second)
- IntrinsicCost -= TTI->getCmpSelInstrCost(Instruction::ICmp, VecTy,
- MaskTy, CostKind);
- VecCost = std::min(VecCost, IntrinsicCost);
- }
return ReuseShuffleCost + VecCost - ScalarCost;
}
case Instruction::FNeg:
diff --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll
index 17be1f760509..18fc74977eef 100644
--- a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -slp-vectorizer -S %s | FileCheck %s
-; RUN: opt -aa-pipeline=basic-aa -passes='slp-vectorizer' -S %s | FileCheck %s
+; RUN: opt -passes='slp-vectorizer' -S %s | FileCheck %s
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
target triple = "arm64-apple-ios5.0.0"
@@ -8,19 +8,45 @@ target triple = "arm64-apple-ios5.0.0"
define void @select_umin_8xi16(i16* %ptr, i16 %x) {
; CHECK-LABEL: @select_umin_8xi16(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
+; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
+; CHECK-NEXT: [[CMP_0:%.*]] = icmp ult i16 [[L_0]], 16383
+; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
+; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
+; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i16 [[L_1]], 16383
+; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
+; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
+; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i16 [[L_2]], 16383
+; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
+; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
+; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp ult i16 [[L_3]], 16383
+; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
+; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
+; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
+; CHECK-NEXT: [[CMP_4:%.*]] = icmp ult i16 [[L_4]], 16383
+; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
+; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
+; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
+; CHECK-NEXT: [[CMP_5:%.*]] = icmp ult i16 [[L_5]], 16383
+; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
+; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
+; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
+; CHECK-NEXT: [[CMP_6:%.*]] = icmp ult i16 [[L_6]], 16383
+; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
+; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
+; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
+; CHECK-NEXT: [[CMP_7:%.*]] = icmp ult i16 [[L_7]], 16383
+; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
+; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2
; CHECK-NEXT: ret void
;
entry:
@@ -76,15 +102,25 @@ entry:
define void @select_umin_4xi32(i32* %ptr, i32 %x) {
; CHECK-LABEL: @select_umin_4xi32(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
+; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT: [[CMP_0:%.*]] = icmp ult i32 [[L_0]], 16383
+; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
+; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
+; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp ult i32 [[L_1]], 16383
+; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
+; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
+; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp ult i32 [[L_2]], 16383
+; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
+; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ult <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp ult i32 [[L_3]], 16383
+; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
+; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4
; CHECK-NEXT: ret void
;
entry:
@@ -199,19 +235,45 @@ entry:
define void @select_umin_ule_8xi16(i16* %ptr, i16 %x) {
; CHECK-LABEL: @select_umin_ule_8xi16(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
+; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
+; CHECK-NEXT: [[CMP_0:%.*]] = icmp ule i16 [[L_0]], 16383
+; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
+; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
+; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp ule i16 [[L_1]], 16383
+; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
+; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
+; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp ule i16 [[L_2]], 16383
+; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
+; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
+; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp ule i16 [[L_3]], 16383
+; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
+; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
+; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
+; CHECK-NEXT: [[CMP_4:%.*]] = icmp ule i16 [[L_4]], 16383
+; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
+; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
+; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
+; CHECK-NEXT: [[CMP_5:%.*]] = icmp ule i16 [[L_5]], 16383
+; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
+; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
+; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
+; CHECK-NEXT: [[CMP_6:%.*]] = icmp ule i16 [[L_6]], 16383
+; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
+; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
+; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
+; CHECK-NEXT: [[CMP_7:%.*]] = icmp ule i16 [[L_7]], 16383
+; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
+; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2
; CHECK-NEXT: ret void
;
entry:
@@ -267,15 +329,25 @@ entry:
define void @select_umin_ule_4xi32(i32* %ptr, i32 %x) {
; CHECK-LABEL: @select_umin_ule_4xi32(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
+; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT: [[CMP_0:%.*]] = icmp ule i32 [[L_0]], 16383
+; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
+; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
+; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp ule i32 [[L_1]], 16383
+; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
+; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
+; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp ule i32 [[L_2]], 16383
+; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
+; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ule <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp ule i32 [[L_3]], 16383
+; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
+; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4
; CHECK-NEXT: ret void
;
entry:
@@ -338,19 +410,45 @@ entry:
define void @select_smin_8xi16(i16* %ptr, i16 %x) {
; CHECK-LABEL: @select_smin_8xi16(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
+; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
+; CHECK-NEXT: [[CMP_0:%.*]] = icmp slt i16 [[L_0]], 16383
+; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
+; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
+; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp slt i16 [[L_1]], 16383
+; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
+; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
+; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp slt i16 [[L_2]], 16383
+; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
+; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
+; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp slt i16 [[L_3]], 16383
+; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
+; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
+; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
+; CHECK-NEXT: [[CMP_4:%.*]] = icmp slt i16 [[L_4]], 16383
+; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
+; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
+; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
+; CHECK-NEXT: [[CMP_5:%.*]] = icmp slt i16 [[L_5]], 16383
+; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
+; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
+; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
+; CHECK-NEXT: [[CMP_6:%.*]] = icmp slt i16 [[L_6]], 16383
+; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
+; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
-; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
+; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
+; CHECK-NEXT: [[CMP_7:%.*]] = icmp slt i16 [[L_7]], 16383
+; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
+; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2
; CHECK-NEXT: ret void
;
entry:
@@ -406,15 +504,25 @@ entry:
define void @select_smin_4xi32(i32* %ptr, i32 %x) {
; CHECK-LABEL: @select_smin_4xi32(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
+; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT: [[CMP_0:%.*]] = icmp slt i32 [[L_0]], 16383
+; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
+; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
+; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp slt i32 [[L_1]], 16383
+; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
+; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
+; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp slt i32 [[L_2]], 16383
+; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
+; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = icmp slt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp slt i32 [[L_3]], 16383
+; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
+; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4
; CHECK-NEXT: ret void
;
entry:
@@ -477,19 +585,45 @@ entry:
define void @select_smin_sle_8xi16(i16* %ptr, i16 %x) {
; CHECK-LABEL: @select_smin_sle_8xi16(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
+; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
+; CHECK-NEXT: [[CMP_0:%.*]] = icmp sle i16 [[L_0]], 16383
+; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
+; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
+; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp sle i16 [[L_1]], 16383
+; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
+; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
+; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp sle i16 [[L_2]], 16383
+; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
+; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
+; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp sle i16 [[L_3]], 16383
+; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
+; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
+; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
+; CHECK-NEXT: [[CMP_4:%.*]] = icmp sle i16 [[L_4]], 16383
+; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
+; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
+; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
+; CHECK-NEXT: [[CMP_5:%.*]] = icmp sle i16 [[L_5]], 16383
+; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
+; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
+; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
+; CHECK-NEXT: [[CMP_6:%.*]] = icmp sle i16 [[L_6]], 16383
+; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
+; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
-; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
+; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
+; CHECK-NEXT: [[CMP_7:%.*]] = icmp sle i16 [[L_7]], 16383
+; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
+; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2
; CHECK-NEXT: ret void
;
entry:
@@ -545,15 +679,25 @@ entry:
define void @select_smin_sle_4xi32(i32* %ptr, i32 %x) {
; CHECK-LABEL: @select_smin_sle_4xi32(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
+; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT: [[CMP_0:%.*]] = icmp sle i32 [[L_0]], 16383
+; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
+; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
+; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp sle i32 [[L_1]], 16383
+; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
+; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
+; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp sle i32 [[L_2]], 16383
+; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
+; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = icmp sle <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp sle i32 [[L_3]], 16383
+; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
+; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4
; CHECK-NEXT: ret void
;
entry:
@@ -615,19 +759,45 @@ entry:
define void @select_umax_8xi16(i16* %ptr, i16 %x) {
; CHECK-LABEL: @select_umax_8xi16(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
+; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
+; CHECK-NEXT: [[CMP_0:%.*]] = icmp ugt i16 [[L_0]], 16383
+; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
+; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
+; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp ugt i16 [[L_1]], 16383
+; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
+; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
+; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp ugt i16 [[L_2]], 16383
+; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
+; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
+; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp ugt i16 [[L_3]], 16383
+; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
+; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
+; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
+; CHECK-NEXT: [[CMP_4:%.*]] = icmp ugt i16 [[L_4]], 16383
+; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
+; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
+; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
+; CHECK-NEXT: [[CMP_5:%.*]] = icmp ugt i16 [[L_5]], 16383
+; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
+; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
+; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
+; CHECK-NEXT: [[CMP_6:%.*]] = icmp ugt i16 [[L_6]], 16383
+; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
+; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
+; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
+; CHECK-NEXT: [[CMP_7:%.*]] = icmp ugt i16 [[L_7]], 16383
+; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
+; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2
; CHECK-NEXT: ret void
;
entry:
@@ -683,15 +853,25 @@ entry:
define void @select_umax_4xi32(i32* %ptr, i32 %x) {
; CHECK-LABEL: @select_umax_4xi32(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
+; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT: [[CMP_0:%.*]] = icmp ugt i32 [[L_0]], 16383
+; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
+; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
+; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp ugt i32 [[L_1]], 16383
+; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
+; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
+; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp ugt i32 [[L_2]], 16383
+; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
+; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = icmp ugt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp ugt i32 [[L_3]], 16383
+; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
+; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4
; CHECK-NEXT: ret void
;
entry:
@@ -754,19 +934,45 @@ entry:
define void @select_umax_uge_8xi16(i16* %ptr, i16 %x) {
; CHECK-LABEL: @select_umax_uge_8xi16(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
+; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
+; CHECK-NEXT: [[CMP_0:%.*]] = icmp uge i16 [[L_0]], 16383
+; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
+; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
+; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp uge i16 [[L_1]], 16383
+; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
+; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
+; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp uge i16 [[L_2]], 16383
+; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
+; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
+; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp uge i16 [[L_3]], 16383
+; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
+; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
+; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
+; CHECK-NEXT: [[CMP_4:%.*]] = icmp uge i16 [[L_4]], 16383
+; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
+; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
+; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
+; CHECK-NEXT: [[CMP_5:%.*]] = icmp uge i16 [[L_5]], 16383
+; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
+; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
+; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
+; CHECK-NEXT: [[CMP_6:%.*]] = icmp uge i16 [[L_6]], 16383
+; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
+; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
-; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
+; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
+; CHECK-NEXT: [[CMP_7:%.*]] = icmp uge i16 [[L_7]], 16383
+; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
+; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2
; CHECK-NEXT: ret void
;
entry:
@@ -822,15 +1028,25 @@ entry:
define void @select_umax_uge_4xi32(i32* %ptr, i32 %x) {
; CHECK-LABEL: @select_umax_uge_4xi32(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
+; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT: [[CMP_0:%.*]] = icmp uge i32 [[L_0]], 16383
+; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
+; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
+; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp uge i32 [[L_1]], 16383
+; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
+; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
+; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp uge i32 [[L_2]], 16383
+; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
+; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = icmp uge <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp uge i32 [[L_3]], 16383
+; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
+; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4
; CHECK-NEXT: ret void
;
entry:
@@ -893,19 +1109,45 @@ entry:
define void @select_smax_8xi16(i16* %ptr, i16 %x) {
; CHECK-LABEL: @select_smax_8xi16(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
+; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
+; CHECK-NEXT: [[CMP_0:%.*]] = icmp sgt i16 [[L_0]], 16383
+; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
+; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
+; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i16 [[L_1]], 16383
+; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
+; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
+; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp sgt i16 [[L_2]], 16383
+; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
+; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
+; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp sgt i16 [[L_3]], 16383
+; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
+; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
+; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
+; CHECK-NEXT: [[CMP_4:%.*]] = icmp sgt i16 [[L_4]], 16383
+; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
+; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
+; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
+; CHECK-NEXT: [[CMP_5:%.*]] = icmp sgt i16 [[L_5]], 16383
+; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
+; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
+; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
+; CHECK-NEXT: [[CMP_6:%.*]] = icmp sgt i16 [[L_6]], 16383
+; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
+; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
-; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
+; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
+; CHECK-NEXT: [[CMP_7:%.*]] = icmp sgt i16 [[L_7]], 16383
+; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
+; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2
; CHECK-NEXT: ret void
;
entry:
@@ -961,15 +1203,25 @@ entry:
define void @select_smax_4xi32(i32* %ptr, i32 %x) {
; CHECK-LABEL: @select_smax_4xi32(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
+; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT: [[CMP_0:%.*]] = icmp sgt i32 [[L_0]], 16383
+; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
+; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
+; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp sgt i32 [[L_1]], 16383
+; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
+; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
+; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp sgt i32 [[L_2]], 16383
+; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
+; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp sgt i32 [[L_3]], 16383
+; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
+; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4
; CHECK-NEXT: ret void
;
entry:
@@ -1033,19 +1285,45 @@ entry:
define void @select_smax_sge_8xi16(i16* %ptr, i16 %x) {
; CHECK-LABEL: @select_smax_sge_8xi16(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i16 1
+; CHECK-NEXT: [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
+; CHECK-NEXT: [[CMP_0:%.*]] = icmp sge i16 [[L_0]], 16383
+; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
+; CHECK-NEXT: store i16 [[S_0]], i16* [[PTR]], align 2
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
+; CHECK-NEXT: [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp sge i16 [[L_1]], 16383
+; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
+; CHECK-NEXT: store i16 [[S_1]], i16* [[GEP_1]], align 2
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
+; CHECK-NEXT: [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp sge i16 [[L_2]], 16383
+; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
+; CHECK-NEXT: store i16 [[S_2]], i16* [[GEP_2]], align 2
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
+; CHECK-NEXT: [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp sge i16 [[L_3]], 16383
+; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
+; CHECK-NEXT: store i16 [[S_3]], i16* [[GEP_3]], align 2
; CHECK-NEXT: [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
+; CHECK-NEXT: [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
+; CHECK-NEXT: [[CMP_4:%.*]] = icmp sge i16 [[L_4]], 16383
+; CHECK-NEXT: [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
+; CHECK-NEXT: store i16 [[S_4]], i16* [[GEP_4]], align 2
; CHECK-NEXT: [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
+; CHECK-NEXT: [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
+; CHECK-NEXT: [[CMP_5:%.*]] = icmp sge i16 [[L_5]], 16383
+; CHECK-NEXT: [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
+; CHECK-NEXT: store i16 [[S_5]], i16* [[GEP_5]], align 2
; CHECK-NEXT: [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
+; CHECK-NEXT: [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
+; CHECK-NEXT: [[CMP_6:%.*]] = icmp sge i16 [[L_6]], 16383
+; CHECK-NEXT: [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
+; CHECK-NEXT: store i16 [[S_6]], i16* [[GEP_6]], align 2
; CHECK-NEXT: [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2
-; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <8 x i16> [[TMP1]], <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> <i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383, i16 16383>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i16* [[PTR]] to <8 x i16>*
-; CHECK-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* [[TMP4]], align 2
+; CHECK-NEXT: [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
+; CHECK-NEXT: [[CMP_7:%.*]] = icmp sge i16 [[L_7]], 16383
+; CHECK-NEXT: [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
+; CHECK-NEXT: store i16 [[S_7]], i16* [[GEP_7]], align 2
; CHECK-NEXT: ret void
;
entry:
@@ -1101,15 +1379,25 @@ entry:
define void @select_smax_sge_4xi32(i32* %ptr, i32 %x) {
; CHECK-LABEL: @select_smax_sge_4xi32(
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i32 1
+; CHECK-NEXT: [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT: [[CMP_0:%.*]] = icmp sge i32 [[L_0]], 16383
+; CHECK-NEXT: [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
+; CHECK-NEXT: store i32 [[S_0]], i32* [[PTR]], align 4
+; CHECK-NEXT: [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
+; CHECK-NEXT: [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
+; CHECK-NEXT: [[CMP_1:%.*]] = icmp sge i32 [[L_1]], 16383
+; CHECK-NEXT: [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
+; CHECK-NEXT: store i32 [[S_1]], i32* [[GEP_1]], align 4
; CHECK-NEXT: [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
+; CHECK-NEXT: [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
+; CHECK-NEXT: [[CMP_2:%.*]] = icmp sge i32 [[L_2]], 16383
+; CHECK-NEXT: [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
+; CHECK-NEXT: store i32 [[S_2]], i32* [[GEP_2]], align 4
; CHECK-NEXT: [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
-; CHECK-NEXT: [[TMP0:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = icmp sge <4 x i32> [[TMP1]], <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP1]], <4 x i32> <i32 16383, i32 16383, i32 16383, i32 16383>
-; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[PTR]] to <4 x i32>*
-; CHECK-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* [[TMP4]], align 4
+; CHECK-NEXT: [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
+; CHECK-NEXT: [[CMP_3:%.*]] = icmp sge i32 [[L_3]], 16383
+; CHECK-NEXT: [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
+; CHECK-NEXT: store i32 [[S_3]], i32* [[GEP_3]], align 4
; CHECK-NEXT: ret void
;
entry:
More information about the llvm-commits
mailing list