[llvm] d842b88 - [SLP] Add tests with selects that can be turned into min/max.

Florian Hahn via llvm-commits llvm-commits at lists.llvm.org
Thu Oct 22 09:26:07 PDT 2020


Author: Florian Hahn
Date: 2020-10-22T17:25:28+01:00
New Revision: d842b8868771a9b6b039144c1f7550adec245f0c

URL: https://github.com/llvm/llvm-project/commit/d842b8868771a9b6b039144c1f7550adec245f0c
DIFF: https://github.com/llvm/llvm-project/commit/d842b8868771a9b6b039144c1f7550adec245f0c.diff

LOG: [SLP] Add tests with selects that can be turned into min/max.

AArch64 does not have a flexible vector select instruction. In some
cases, the selects can be turned into min/max however, for which there
are dedicated vector instructions on AArch64.

This patch adds some tests for such cases.

Added: 
    llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll

Modified: 
    

Removed: 
    


################################################################################
diff  --git a/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll
new file mode 100644
index 000000000000..18fc74977eef
--- /dev/null
+++ b/llvm/test/Transforms/SLPVectorizer/AArch64/vectorizable-selects-min-max.ll
@@ -0,0 +1,1458 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt -slp-vectorizer -S %s | FileCheck %s
+; RUN: opt -passes='slp-vectorizer' -S %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios5.0.0"
+
+define void @select_umin_8xi16(i16* %ptr, i16 %x) {
+; CHECK-LABEL: @select_umin_8xi16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp ult i16 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
+; CHECK-NEXT:    store i16 [[S_0]], i16* [[PTR]], align 2
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp ult i16 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
+; CHECK-NEXT:    store i16 [[S_1]], i16* [[GEP_1]], align 2
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
+; CHECK-NEXT:    [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp ult i16 [[L_2]], 16383
+; CHECK-NEXT:    [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
+; CHECK-NEXT:    store i16 [[S_2]], i16* [[GEP_2]], align 2
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
+; CHECK-NEXT:    [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp ult i16 [[L_3]], 16383
+; CHECK-NEXT:    [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
+; CHECK-NEXT:    store i16 [[S_3]], i16* [[GEP_3]], align 2
+; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
+; CHECK-NEXT:    [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
+; CHECK-NEXT:    [[CMP_4:%.*]] = icmp ult i16 [[L_4]], 16383
+; CHECK-NEXT:    [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
+; CHECK-NEXT:    store i16 [[S_4]], i16* [[GEP_4]], align 2
+; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
+; CHECK-NEXT:    [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
+; CHECK-NEXT:    [[CMP_5:%.*]] = icmp ult i16 [[L_5]], 16383
+; CHECK-NEXT:    [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
+; CHECK-NEXT:    store i16 [[S_5]], i16* [[GEP_5]], align 2
+; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
+; CHECK-NEXT:    [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
+; CHECK-NEXT:    [[CMP_6:%.*]] = icmp ult i16 [[L_6]], 16383
+; CHECK-NEXT:    [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
+; CHECK-NEXT:    store i16 [[S_6]], i16* [[GEP_6]], align 2
+; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
+; CHECK-NEXT:    [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
+; CHECK-NEXT:    [[CMP_7:%.*]] = icmp ult i16 [[L_7]], 16383
+; CHECK-NEXT:    [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
+; CHECK-NEXT:    store i16 [[S_7]], i16* [[GEP_7]], align 2
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i16, i16* %ptr
+  %cmp.0 = icmp ult i16 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
+  store i16 %s.0, i16* %ptr, align 2
+
+  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
+  %l.1 = load i16, i16* %gep.1
+  %cmp.1 = icmp ult i16 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
+  store i16 %s.1, i16* %gep.1, align 2
+
+  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
+  %l.2 = load i16, i16* %gep.2
+  %cmp.2 = icmp ult i16 %l.2, 16383
+  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
+  store i16 %s.2, i16* %gep.2, align 2
+
+  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
+  %l.3 = load i16, i16* %gep.3
+  %cmp.3 = icmp ult i16 %l.3, 16383
+  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
+  store i16 %s.3, i16* %gep.3, align 2
+
+  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
+  %l.4 = load i16, i16* %gep.4
+  %cmp.4 = icmp ult i16 %l.4, 16383
+  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
+  store i16 %s.4, i16* %gep.4, align 2
+
+  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
+  %l.5 = load i16, i16* %gep.5
+  %cmp.5 = icmp ult i16 %l.5, 16383
+  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
+  store i16 %s.5, i16* %gep.5, align 2
+
+  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
+  %l.6 = load i16, i16* %gep.6
+  %cmp.6 = icmp ult i16 %l.6, 16383
+  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
+  store i16 %s.6, i16* %gep.6, align 2
+
+  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
+  %l.7 = load i16, i16* %gep.7
+  %cmp.7 = icmp ult i16 %l.7, 16383
+  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
+  store i16 %s.7, i16* %gep.7, align 2
+  ret void
+}
+
+define void @select_umin_4xi32(i32* %ptr, i32 %x) {
+; CHECK-LABEL: @select_umin_4xi32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp ult i32 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
+; CHECK-NEXT:    store i32 [[S_0]], i32* [[PTR]], align 4
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp ult i32 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
+; CHECK-NEXT:    store i32 [[S_1]], i32* [[GEP_1]], align 4
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp ult i32 [[L_2]], 16383
+; CHECK-NEXT:    [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
+; CHECK-NEXT:    store i32 [[S_2]], i32* [[GEP_2]], align 4
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
+; CHECK-NEXT:    [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp ult i32 [[L_3]], 16383
+; CHECK-NEXT:    [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
+; CHECK-NEXT:    store i32 [[S_3]], i32* [[GEP_3]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i32, i32* %ptr
+  %cmp.0 = icmp ult i32 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
+  store i32 %s.0, i32* %ptr, align 4
+
+  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
+  %l.1 = load i32, i32* %gep.1
+  %cmp.1 = icmp ult i32 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
+  store i32 %s.1, i32* %gep.1, align 4
+
+  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
+  %l.2 = load i32, i32* %gep.2
+  %cmp.2 = icmp ult i32 %l.2, 16383
+  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
+  store i32 %s.2, i32* %gep.2, align 4
+
+  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
+  %l.3 = load i32, i32* %gep.3
+  %cmp.3 = icmp ult i32 %l.3, 16383
+  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
+  store i32 %s.3, i32* %gep.3, align 4
+
+  ret void
+}
+
+define void @select_ule_ugt_mix_4xi32(i32* %ptr, i32 %x) {
+; CHECK-LABEL: @select_ule_ugt_mix_4xi32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp ult i32 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
+; CHECK-NEXT:    store i32 [[S_0]], i32* [[PTR]], align 4
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp ugt i32 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
+; CHECK-NEXT:    store i32 [[S_1]], i32* [[GEP_1]], align 4
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp ult i32 [[L_2]], 16383
+; CHECK-NEXT:    [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
+; CHECK-NEXT:    store i32 [[S_2]], i32* [[GEP_2]], align 4
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
+; CHECK-NEXT:    [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp ugt i32 [[L_3]], 16383
+; CHECK-NEXT:    [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
+; CHECK-NEXT:    store i32 [[S_3]], i32* [[GEP_3]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i32, i32* %ptr
+  %cmp.0 = icmp ult i32 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
+  store i32 %s.0, i32* %ptr, align 4
+
+  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
+  %l.1 = load i32, i32* %gep.1
+  %cmp.1 = icmp ugt i32 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
+  store i32 %s.1, i32* %gep.1, align 4
+
+  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
+  %l.2 = load i32, i32* %gep.2
+  %cmp.2 = icmp ult i32 %l.2, 16383
+  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
+  store i32 %s.2, i32* %gep.2, align 4
+
+  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
+  %l.3 = load i32, i32* %gep.3
+  %cmp.3 = icmp ugt i32 %l.3, 16383
+  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
+  store i32 %s.3, i32* %gep.3, align 4
+
+  ret void
+}
+
+; There is no <2 x i64> version of umin.
+define void @select_umin_2xi64(i64* %ptr, i64 %x) {
+; CHECK-LABEL: @select_umin_2xi64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp ult i64 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
+; CHECK-NEXT:    store i64 [[S_0]], i64* [[PTR]], align 4
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp ult i64 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
+; CHECK-NEXT:    store i64 [[S_1]], i64* [[GEP_1]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i64, i64* %ptr
+  %cmp.0 = icmp ult i64 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
+  store i64 %s.0, i64* %ptr, align 4
+
+  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
+  %l.1 = load i64, i64* %gep.1
+  %cmp.1 = icmp ult i64 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
+  store i64 %s.1, i64* %gep.1, align 4
+
+  ret void
+}
+
+
+define void @select_umin_ule_8xi16(i16* %ptr, i16 %x) {
+; CHECK-LABEL: @select_umin_ule_8xi16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp ule i16 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
+; CHECK-NEXT:    store i16 [[S_0]], i16* [[PTR]], align 2
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp ule i16 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
+; CHECK-NEXT:    store i16 [[S_1]], i16* [[GEP_1]], align 2
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
+; CHECK-NEXT:    [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp ule i16 [[L_2]], 16383
+; CHECK-NEXT:    [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
+; CHECK-NEXT:    store i16 [[S_2]], i16* [[GEP_2]], align 2
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
+; CHECK-NEXT:    [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp ule i16 [[L_3]], 16383
+; CHECK-NEXT:    [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
+; CHECK-NEXT:    store i16 [[S_3]], i16* [[GEP_3]], align 2
+; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
+; CHECK-NEXT:    [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
+; CHECK-NEXT:    [[CMP_4:%.*]] = icmp ule i16 [[L_4]], 16383
+; CHECK-NEXT:    [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
+; CHECK-NEXT:    store i16 [[S_4]], i16* [[GEP_4]], align 2
+; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
+; CHECK-NEXT:    [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
+; CHECK-NEXT:    [[CMP_5:%.*]] = icmp ule i16 [[L_5]], 16383
+; CHECK-NEXT:    [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
+; CHECK-NEXT:    store i16 [[S_5]], i16* [[GEP_5]], align 2
+; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
+; CHECK-NEXT:    [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
+; CHECK-NEXT:    [[CMP_6:%.*]] = icmp ule i16 [[L_6]], 16383
+; CHECK-NEXT:    [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
+; CHECK-NEXT:    store i16 [[S_6]], i16* [[GEP_6]], align 2
+; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
+; CHECK-NEXT:    [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
+; CHECK-NEXT:    [[CMP_7:%.*]] = icmp ule i16 [[L_7]], 16383
+; CHECK-NEXT:    [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
+; CHECK-NEXT:    store i16 [[S_7]], i16* [[GEP_7]], align 2
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i16, i16* %ptr
+  %cmp.0 = icmp ule i16 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
+  store i16 %s.0, i16* %ptr, align 2
+
+  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
+  %l.1 = load i16, i16* %gep.1
+  %cmp.1 = icmp ule i16 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
+  store i16 %s.1, i16* %gep.1, align 2
+
+  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
+  %l.2 = load i16, i16* %gep.2
+  %cmp.2 = icmp ule i16 %l.2, 16383
+  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
+  store i16 %s.2, i16* %gep.2, align 2
+
+  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
+  %l.3 = load i16, i16* %gep.3
+  %cmp.3 = icmp ule i16 %l.3, 16383
+  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
+  store i16 %s.3, i16* %gep.3, align 2
+
+  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
+  %l.4 = load i16, i16* %gep.4
+  %cmp.4 = icmp ule i16 %l.4, 16383
+  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
+  store i16 %s.4, i16* %gep.4, align 2
+
+  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
+  %l.5 = load i16, i16* %gep.5
+  %cmp.5 = icmp ule i16 %l.5, 16383
+  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
+  store i16 %s.5, i16* %gep.5, align 2
+
+  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
+  %l.6 = load i16, i16* %gep.6
+  %cmp.6 = icmp ule i16 %l.6, 16383
+  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
+  store i16 %s.6, i16* %gep.6, align 2
+
+  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
+  %l.7 = load i16, i16* %gep.7
+  %cmp.7 = icmp ule i16 %l.7, 16383
+  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
+  store i16 %s.7, i16* %gep.7, align 2
+  ret void
+}
+
+define void @select_umin_ule_4xi32(i32* %ptr, i32 %x) {
+; CHECK-LABEL: @select_umin_ule_4xi32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp ule i32 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
+; CHECK-NEXT:    store i32 [[S_0]], i32* [[PTR]], align 4
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp ule i32 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
+; CHECK-NEXT:    store i32 [[S_1]], i32* [[GEP_1]], align 4
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp ule i32 [[L_2]], 16383
+; CHECK-NEXT:    [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
+; CHECK-NEXT:    store i32 [[S_2]], i32* [[GEP_2]], align 4
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
+; CHECK-NEXT:    [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp ule i32 [[L_3]], 16383
+; CHECK-NEXT:    [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
+; CHECK-NEXT:    store i32 [[S_3]], i32* [[GEP_3]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i32, i32* %ptr
+  %cmp.0 = icmp ule i32 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
+  store i32 %s.0, i32* %ptr, align 4
+
+  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
+  %l.1 = load i32, i32* %gep.1
+  %cmp.1 = icmp ule i32 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
+  store i32 %s.1, i32* %gep.1, align 4
+
+  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
+  %l.2 = load i32, i32* %gep.2
+  %cmp.2 = icmp ule i32 %l.2, 16383
+  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
+  store i32 %s.2, i32* %gep.2, align 4
+
+  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
+  %l.3 = load i32, i32* %gep.3
+  %cmp.3 = icmp ule i32 %l.3, 16383
+  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
+  store i32 %s.3, i32* %gep.3, align 4
+
+  ret void
+}
+
+; There is no <2 x i64> version of umin.
+define void @select_umin_ule_2xi64(i64* %ptr, i64 %x) {
+; CHECK-LABEL: @select_umin_ule_2xi64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp ule i64 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
+; CHECK-NEXT:    store i64 [[S_0]], i64* [[PTR]], align 4
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp ule i64 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
+; CHECK-NEXT:    store i64 [[S_1]], i64* [[GEP_1]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i64, i64* %ptr
+  %cmp.0 = icmp ule i64 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
+  store i64 %s.0, i64* %ptr, align 4
+
+  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
+  %l.1 = load i64, i64* %gep.1
+  %cmp.1 = icmp ule i64 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
+  store i64 %s.1, i64* %gep.1, align 4
+
+  ret void
+}
+
+define void @select_smin_8xi16(i16* %ptr, i16 %x) {
+; CHECK-LABEL: @select_smin_8xi16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp slt i16 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
+; CHECK-NEXT:    store i16 [[S_0]], i16* [[PTR]], align 2
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp slt i16 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
+; CHECK-NEXT:    store i16 [[S_1]], i16* [[GEP_1]], align 2
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
+; CHECK-NEXT:    [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp slt i16 [[L_2]], 16383
+; CHECK-NEXT:    [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
+; CHECK-NEXT:    store i16 [[S_2]], i16* [[GEP_2]], align 2
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
+; CHECK-NEXT:    [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp slt i16 [[L_3]], 16383
+; CHECK-NEXT:    [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
+; CHECK-NEXT:    store i16 [[S_3]], i16* [[GEP_3]], align 2
+; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
+; CHECK-NEXT:    [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
+; CHECK-NEXT:    [[CMP_4:%.*]] = icmp slt i16 [[L_4]], 16383
+; CHECK-NEXT:    [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
+; CHECK-NEXT:    store i16 [[S_4]], i16* [[GEP_4]], align 2
+; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
+; CHECK-NEXT:    [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
+; CHECK-NEXT:    [[CMP_5:%.*]] = icmp slt i16 [[L_5]], 16383
+; CHECK-NEXT:    [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
+; CHECK-NEXT:    store i16 [[S_5]], i16* [[GEP_5]], align 2
+; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
+; CHECK-NEXT:    [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
+; CHECK-NEXT:    [[CMP_6:%.*]] = icmp slt i16 [[L_6]], 16383
+; CHECK-NEXT:    [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
+; CHECK-NEXT:    store i16 [[S_6]], i16* [[GEP_6]], align 2
+; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
+; CHECK-NEXT:    [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
+; CHECK-NEXT:    [[CMP_7:%.*]] = icmp slt i16 [[L_7]], 16383
+; CHECK-NEXT:    [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
+; CHECK-NEXT:    store i16 [[S_7]], i16* [[GEP_7]], align 2
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i16, i16* %ptr
+  %cmp.0 = icmp slt i16 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
+  store i16 %s.0, i16* %ptr, align 2
+
+  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
+  %l.1 = load i16, i16* %gep.1
+  %cmp.1 = icmp slt i16 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
+  store i16 %s.1, i16* %gep.1, align 2
+
+  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
+  %l.2 = load i16, i16* %gep.2
+  %cmp.2 = icmp slt i16 %l.2, 16383
+  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
+  store i16 %s.2, i16* %gep.2, align 2
+
+  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
+  %l.3 = load i16, i16* %gep.3
+  %cmp.3 = icmp slt i16 %l.3, 16383
+  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
+  store i16 %s.3, i16* %gep.3, align 2
+
+  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
+  %l.4 = load i16, i16* %gep.4
+  %cmp.4 = icmp slt i16 %l.4, 16383
+  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
+  store i16 %s.4, i16* %gep.4, align 2
+
+  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
+  %l.5 = load i16, i16* %gep.5
+  %cmp.5 = icmp slt i16 %l.5, 16383
+  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
+  store i16 %s.5, i16* %gep.5, align 2
+
+  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
+  %l.6 = load i16, i16* %gep.6
+  %cmp.6 = icmp slt i16 %l.6, 16383
+  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
+  store i16 %s.6, i16* %gep.6, align 2
+
+  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
+  %l.7 = load i16, i16* %gep.7
+  %cmp.7 = icmp slt i16 %l.7, 16383
+  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
+  store i16 %s.7, i16* %gep.7, align 2
+  ret void
+}
+
+define void @select_smin_4xi32(i32* %ptr, i32 %x) {
+; CHECK-LABEL: @select_smin_4xi32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp slt i32 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
+; CHECK-NEXT:    store i32 [[S_0]], i32* [[PTR]], align 4
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp slt i32 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
+; CHECK-NEXT:    store i32 [[S_1]], i32* [[GEP_1]], align 4
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp slt i32 [[L_2]], 16383
+; CHECK-NEXT:    [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
+; CHECK-NEXT:    store i32 [[S_2]], i32* [[GEP_2]], align 4
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
+; CHECK-NEXT:    [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp slt i32 [[L_3]], 16383
+; CHECK-NEXT:    [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
+; CHECK-NEXT:    store i32 [[S_3]], i32* [[GEP_3]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i32, i32* %ptr
+  %cmp.0 = icmp slt i32 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
+  store i32 %s.0, i32* %ptr, align 4
+
+  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
+  %l.1 = load i32, i32* %gep.1
+  %cmp.1 = icmp slt i32 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
+  store i32 %s.1, i32* %gep.1, align 4
+
+  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
+  %l.2 = load i32, i32* %gep.2
+  %cmp.2 = icmp slt i32 %l.2, 16383
+  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
+  store i32 %s.2, i32* %gep.2, align 4
+
+  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
+  %l.3 = load i32, i32* %gep.3
+  %cmp.3 = icmp slt i32 %l.3, 16383
+  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
+  store i32 %s.3, i32* %gep.3, align 4
+
+  ret void
+}
+
+; There is no <2 x i64> version of smin.
+define void @select_smin_2xi64(i64* %ptr, i64 %x) {
+; CHECK-LABEL: @select_smin_2xi64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp slt i64 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
+; CHECK-NEXT:    store i64 [[S_0]], i64* [[PTR]], align 4
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp slt i64 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
+; CHECK-NEXT:    store i64 [[S_1]], i64* [[GEP_1]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i64, i64* %ptr
+  %cmp.0 = icmp slt i64 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
+  store i64 %s.0, i64* %ptr, align 4
+
+  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
+  %l.1 = load i64, i64* %gep.1
+  %cmp.1 = icmp slt i64 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
+  store i64 %s.1, i64* %gep.1, align 4
+
+  ret void
+}
+
+define void @select_smin_sle_8xi16(i16* %ptr, i16 %x) {
+; CHECK-LABEL: @select_smin_sle_8xi16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp sle i16 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
+; CHECK-NEXT:    store i16 [[S_0]], i16* [[PTR]], align 2
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sle i16 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
+; CHECK-NEXT:    store i16 [[S_1]], i16* [[GEP_1]], align 2
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
+; CHECK-NEXT:    [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp sle i16 [[L_2]], 16383
+; CHECK-NEXT:    [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
+; CHECK-NEXT:    store i16 [[S_2]], i16* [[GEP_2]], align 2
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
+; CHECK-NEXT:    [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp sle i16 [[L_3]], 16383
+; CHECK-NEXT:    [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
+; CHECK-NEXT:    store i16 [[S_3]], i16* [[GEP_3]], align 2
+; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
+; CHECK-NEXT:    [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
+; CHECK-NEXT:    [[CMP_4:%.*]] = icmp sle i16 [[L_4]], 16383
+; CHECK-NEXT:    [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
+; CHECK-NEXT:    store i16 [[S_4]], i16* [[GEP_4]], align 2
+; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
+; CHECK-NEXT:    [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
+; CHECK-NEXT:    [[CMP_5:%.*]] = icmp sle i16 [[L_5]], 16383
+; CHECK-NEXT:    [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
+; CHECK-NEXT:    store i16 [[S_5]], i16* [[GEP_5]], align 2
+; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
+; CHECK-NEXT:    [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
+; CHECK-NEXT:    [[CMP_6:%.*]] = icmp sle i16 [[L_6]], 16383
+; CHECK-NEXT:    [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
+; CHECK-NEXT:    store i16 [[S_6]], i16* [[GEP_6]], align 2
+; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
+; CHECK-NEXT:    [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
+; CHECK-NEXT:    [[CMP_7:%.*]] = icmp sle i16 [[L_7]], 16383
+; CHECK-NEXT:    [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
+; CHECK-NEXT:    store i16 [[S_7]], i16* [[GEP_7]], align 2
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i16, i16* %ptr
+  %cmp.0 = icmp sle i16 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
+  store i16 %s.0, i16* %ptr, align 2
+
+  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
+  %l.1 = load i16, i16* %gep.1
+  %cmp.1 = icmp sle i16 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
+  store i16 %s.1, i16* %gep.1, align 2
+
+  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
+  %l.2 = load i16, i16* %gep.2
+  %cmp.2 = icmp sle i16 %l.2, 16383
+  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
+  store i16 %s.2, i16* %gep.2, align 2
+
+  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
+  %l.3 = load i16, i16* %gep.3
+  %cmp.3 = icmp sle i16 %l.3, 16383
+  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
+  store i16 %s.3, i16* %gep.3, align 2
+
+  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
+  %l.4 = load i16, i16* %gep.4
+  %cmp.4 = icmp sle i16 %l.4, 16383
+  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
+  store i16 %s.4, i16* %gep.4, align 2
+
+  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
+  %l.5 = load i16, i16* %gep.5
+  %cmp.5 = icmp sle i16 %l.5, 16383
+  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
+  store i16 %s.5, i16* %gep.5, align 2
+
+  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
+  %l.6 = load i16, i16* %gep.6
+  %cmp.6 = icmp sle i16 %l.6, 16383
+  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
+  store i16 %s.6, i16* %gep.6, align 2
+
+  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
+  %l.7 = load i16, i16* %gep.7
+  %cmp.7 = icmp sle i16 %l.7, 16383
+  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
+  store i16 %s.7, i16* %gep.7, align 2
+  ret void
+}
+
+define void @select_smin_sle_4xi32(i32* %ptr, i32 %x) {
+; CHECK-LABEL: @select_smin_sle_4xi32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp sle i32 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
+; CHECK-NEXT:    store i32 [[S_0]], i32* [[PTR]], align 4
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sle i32 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
+; CHECK-NEXT:    store i32 [[S_1]], i32* [[GEP_1]], align 4
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp sle i32 [[L_2]], 16383
+; CHECK-NEXT:    [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
+; CHECK-NEXT:    store i32 [[S_2]], i32* [[GEP_2]], align 4
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
+; CHECK-NEXT:    [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp sle i32 [[L_3]], 16383
+; CHECK-NEXT:    [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
+; CHECK-NEXT:    store i32 [[S_3]], i32* [[GEP_3]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i32, i32* %ptr
+  %cmp.0 = icmp sle i32 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
+  store i32 %s.0, i32* %ptr, align 4
+
+  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
+  %l.1 = load i32, i32* %gep.1
+  %cmp.1 = icmp sle i32 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
+  store i32 %s.1, i32* %gep.1, align 4
+
+  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
+  %l.2 = load i32, i32* %gep.2
+  %cmp.2 = icmp sle i32 %l.2, 16383
+  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
+  store i32 %s.2, i32* %gep.2, align 4
+
+  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
+  %l.3 = load i32, i32* %gep.3
+  %cmp.3 = icmp sle i32 %l.3, 16383
+  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
+  store i32 %s.3, i32* %gep.3, align 4
+
+  ret void
+}
+
+; There is no <2 x i64> version of smin.
+define void @select_smin_sle_2xi64(i64* %ptr, i64 %x) {
+; CHECK-LABEL: @select_smin_sle_2xi64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp sle i64 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
+; CHECK-NEXT:    store i64 [[S_0]], i64* [[PTR]], align 4
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sle i64 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
+; CHECK-NEXT:    store i64 [[S_1]], i64* [[GEP_1]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i64, i64* %ptr
+  %cmp.0 = icmp sle i64 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
+  store i64 %s.0, i64* %ptr, align 4
+
+  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
+  %l.1 = load i64, i64* %gep.1
+  %cmp.1 = icmp sle i64 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
+  store i64 %s.1, i64* %gep.1, align 4
+
+  ret void
+}
+define void @select_umax_8xi16(i16* %ptr, i16 %x) {
+; CHECK-LABEL: @select_umax_8xi16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp ugt i16 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
+; CHECK-NEXT:    store i16 [[S_0]], i16* [[PTR]], align 2
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp ugt i16 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
+; CHECK-NEXT:    store i16 [[S_1]], i16* [[GEP_1]], align 2
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
+; CHECK-NEXT:    [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp ugt i16 [[L_2]], 16383
+; CHECK-NEXT:    [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
+; CHECK-NEXT:    store i16 [[S_2]], i16* [[GEP_2]], align 2
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
+; CHECK-NEXT:    [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp ugt i16 [[L_3]], 16383
+; CHECK-NEXT:    [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
+; CHECK-NEXT:    store i16 [[S_3]], i16* [[GEP_3]], align 2
+; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
+; CHECK-NEXT:    [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
+; CHECK-NEXT:    [[CMP_4:%.*]] = icmp ugt i16 [[L_4]], 16383
+; CHECK-NEXT:    [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
+; CHECK-NEXT:    store i16 [[S_4]], i16* [[GEP_4]], align 2
+; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
+; CHECK-NEXT:    [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
+; CHECK-NEXT:    [[CMP_5:%.*]] = icmp ugt i16 [[L_5]], 16383
+; CHECK-NEXT:    [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
+; CHECK-NEXT:    store i16 [[S_5]], i16* [[GEP_5]], align 2
+; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
+; CHECK-NEXT:    [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
+; CHECK-NEXT:    [[CMP_6:%.*]] = icmp ugt i16 [[L_6]], 16383
+; CHECK-NEXT:    [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
+; CHECK-NEXT:    store i16 [[S_6]], i16* [[GEP_6]], align 2
+; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
+; CHECK-NEXT:    [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
+; CHECK-NEXT:    [[CMP_7:%.*]] = icmp ugt i16 [[L_7]], 16383
+; CHECK-NEXT:    [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
+; CHECK-NEXT:    store i16 [[S_7]], i16* [[GEP_7]], align 2
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i16, i16* %ptr
+  %cmp.0 = icmp ugt i16 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
+  store i16 %s.0, i16* %ptr, align 2
+
+  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
+  %l.1 = load i16, i16* %gep.1
+  %cmp.1 = icmp ugt i16 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
+  store i16 %s.1, i16* %gep.1, align 2
+
+  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
+  %l.2 = load i16, i16* %gep.2
+  %cmp.2 = icmp ugt i16 %l.2, 16383
+  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
+  store i16 %s.2, i16* %gep.2, align 2
+
+  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
+  %l.3 = load i16, i16* %gep.3
+  %cmp.3 = icmp ugt i16 %l.3, 16383
+  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
+  store i16 %s.3, i16* %gep.3, align 2
+
+  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
+  %l.4 = load i16, i16* %gep.4
+  %cmp.4 = icmp ugt i16 %l.4, 16383
+  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
+  store i16 %s.4, i16* %gep.4, align 2
+
+  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
+  %l.5 = load i16, i16* %gep.5
+  %cmp.5 = icmp ugt i16 %l.5, 16383
+  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
+  store i16 %s.5, i16* %gep.5, align 2
+
+  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
+  %l.6 = load i16, i16* %gep.6
+  %cmp.6 = icmp ugt i16 %l.6, 16383
+  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
+  store i16 %s.6, i16* %gep.6, align 2
+
+  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
+  %l.7 = load i16, i16* %gep.7
+  %cmp.7 = icmp ugt i16 %l.7, 16383
+  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
+  store i16 %s.7, i16* %gep.7, align 2
+  ret void
+}
+
+define void @select_umax_4xi32(i32* %ptr, i32 %x) {
+; CHECK-LABEL: @select_umax_4xi32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp ugt i32 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
+; CHECK-NEXT:    store i32 [[S_0]], i32* [[PTR]], align 4
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp ugt i32 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
+; CHECK-NEXT:    store i32 [[S_1]], i32* [[GEP_1]], align 4
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp ugt i32 [[L_2]], 16383
+; CHECK-NEXT:    [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
+; CHECK-NEXT:    store i32 [[S_2]], i32* [[GEP_2]], align 4
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
+; CHECK-NEXT:    [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp ugt i32 [[L_3]], 16383
+; CHECK-NEXT:    [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
+; CHECK-NEXT:    store i32 [[S_3]], i32* [[GEP_3]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i32, i32* %ptr
+  %cmp.0 = icmp ugt i32 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
+  store i32 %s.0, i32* %ptr, align 4
+
+  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
+  %l.1 = load i32, i32* %gep.1
+  %cmp.1 = icmp ugt i32 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
+  store i32 %s.1, i32* %gep.1, align 4
+
+  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
+  %l.2 = load i32, i32* %gep.2
+  %cmp.2 = icmp ugt i32 %l.2, 16383
+  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
+  store i32 %s.2, i32* %gep.2, align 4
+
+  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
+  %l.3 = load i32, i32* %gep.3
+  %cmp.3 = icmp ugt i32 %l.3, 16383
+  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
+  store i32 %s.3, i32* %gep.3, align 4
+
+  ret void
+}
+
+; There is no <2 x i64> version of umax.
+define void @select_umax_2xi64(i64* %ptr, i64 %x) {
+; CHECK-LABEL: @select_umax_2xi64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp ugt i64 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
+; CHECK-NEXT:    store i64 [[S_0]], i64* [[PTR]], align 4
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp ugt i64 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
+; CHECK-NEXT:    store i64 [[S_1]], i64* [[GEP_1]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i64, i64* %ptr
+  %cmp.0 = icmp ugt i64 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
+  store i64 %s.0, i64* %ptr, align 4
+
+  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
+  %l.1 = load i64, i64* %gep.1
+  %cmp.1 = icmp ugt i64 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
+  store i64 %s.1, i64* %gep.1, align 4
+
+  ret void
+}
+
+define void @select_umax_uge_8xi16(i16* %ptr, i16 %x) {
+; CHECK-LABEL: @select_umax_uge_8xi16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp uge i16 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
+; CHECK-NEXT:    store i16 [[S_0]], i16* [[PTR]], align 2
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp uge i16 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
+; CHECK-NEXT:    store i16 [[S_1]], i16* [[GEP_1]], align 2
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
+; CHECK-NEXT:    [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp uge i16 [[L_2]], 16383
+; CHECK-NEXT:    [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
+; CHECK-NEXT:    store i16 [[S_2]], i16* [[GEP_2]], align 2
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
+; CHECK-NEXT:    [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp uge i16 [[L_3]], 16383
+; CHECK-NEXT:    [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
+; CHECK-NEXT:    store i16 [[S_3]], i16* [[GEP_3]], align 2
+; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
+; CHECK-NEXT:    [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
+; CHECK-NEXT:    [[CMP_4:%.*]] = icmp uge i16 [[L_4]], 16383
+; CHECK-NEXT:    [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
+; CHECK-NEXT:    store i16 [[S_4]], i16* [[GEP_4]], align 2
+; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
+; CHECK-NEXT:    [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
+; CHECK-NEXT:    [[CMP_5:%.*]] = icmp uge i16 [[L_5]], 16383
+; CHECK-NEXT:    [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
+; CHECK-NEXT:    store i16 [[S_5]], i16* [[GEP_5]], align 2
+; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
+; CHECK-NEXT:    [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
+; CHECK-NEXT:    [[CMP_6:%.*]] = icmp uge i16 [[L_6]], 16383
+; CHECK-NEXT:    [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
+; CHECK-NEXT:    store i16 [[S_6]], i16* [[GEP_6]], align 2
+; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
+; CHECK-NEXT:    [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
+; CHECK-NEXT:    [[CMP_7:%.*]] = icmp uge i16 [[L_7]], 16383
+; CHECK-NEXT:    [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
+; CHECK-NEXT:    store i16 [[S_7]], i16* [[GEP_7]], align 2
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i16, i16* %ptr
+  %cmp.0 = icmp uge i16 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
+  store i16 %s.0, i16* %ptr, align 2
+
+  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
+  %l.1 = load i16, i16* %gep.1
+  %cmp.1 = icmp uge i16 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
+  store i16 %s.1, i16* %gep.1, align 2
+
+  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
+  %l.2 = load i16, i16* %gep.2
+  %cmp.2 = icmp uge i16 %l.2, 16383
+  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
+  store i16 %s.2, i16* %gep.2, align 2
+
+  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
+  %l.3 = load i16, i16* %gep.3
+  %cmp.3 = icmp uge i16 %l.3, 16383
+  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
+  store i16 %s.3, i16* %gep.3, align 2
+
+  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
+  %l.4 = load i16, i16* %gep.4
+  %cmp.4 = icmp uge i16 %l.4, 16383
+  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
+  store i16 %s.4, i16* %gep.4, align 2
+
+  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
+  %l.5 = load i16, i16* %gep.5
+  %cmp.5 = icmp uge i16 %l.5, 16383
+  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
+  store i16 %s.5, i16* %gep.5, align 2
+
+  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
+  %l.6 = load i16, i16* %gep.6
+  %cmp.6 = icmp uge i16 %l.6, 16383
+  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
+  store i16 %s.6, i16* %gep.6, align 2
+
+  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
+  %l.7 = load i16, i16* %gep.7
+  %cmp.7 = icmp uge i16 %l.7, 16383
+  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
+  store i16 %s.7, i16* %gep.7, align 2
+  ret void
+}
+
+define void @select_umax_uge_4xi32(i32* %ptr, i32 %x) {
+; CHECK-LABEL: @select_umax_uge_4xi32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp uge i32 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
+; CHECK-NEXT:    store i32 [[S_0]], i32* [[PTR]], align 4
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp uge i32 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
+; CHECK-NEXT:    store i32 [[S_1]], i32* [[GEP_1]], align 4
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp uge i32 [[L_2]], 16383
+; CHECK-NEXT:    [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
+; CHECK-NEXT:    store i32 [[S_2]], i32* [[GEP_2]], align 4
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
+; CHECK-NEXT:    [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp uge i32 [[L_3]], 16383
+; CHECK-NEXT:    [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
+; CHECK-NEXT:    store i32 [[S_3]], i32* [[GEP_3]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i32, i32* %ptr
+  %cmp.0 = icmp uge i32 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
+  store i32 %s.0, i32* %ptr, align 4
+
+  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
+  %l.1 = load i32, i32* %gep.1
+  %cmp.1 = icmp uge i32 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
+  store i32 %s.1, i32* %gep.1, align 4
+
+  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
+  %l.2 = load i32, i32* %gep.2
+  %cmp.2 = icmp uge i32 %l.2, 16383
+  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
+  store i32 %s.2, i32* %gep.2, align 4
+
+  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
+  %l.3 = load i32, i32* %gep.3
+  %cmp.3 = icmp uge i32 %l.3, 16383
+  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
+  store i32 %s.3, i32* %gep.3, align 4
+
+  ret void
+}
+
+; There is no <2 x i64> version of umax.
+define void @select_umax_uge_2xi64(i64* %ptr, i64 %x) {
+; CHECK-LABEL: @select_umax_uge_2xi64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp uge i64 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
+; CHECK-NEXT:    store i64 [[S_0]], i64* [[PTR]], align 4
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp uge i64 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
+; CHECK-NEXT:    store i64 [[S_1]], i64* [[GEP_1]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i64, i64* %ptr
+  %cmp.0 = icmp uge i64 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
+  store i64 %s.0, i64* %ptr, align 4
+
+  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
+  %l.1 = load i64, i64* %gep.1
+  %cmp.1 = icmp uge i64 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
+  store i64 %s.1, i64* %gep.1, align 4
+
+  ret void
+}
+
+define void @select_smax_8xi16(i16* %ptr, i16 %x) {
+; CHECK-LABEL: @select_smax_8xi16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp sgt i16 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
+; CHECK-NEXT:    store i16 [[S_0]], i16* [[PTR]], align 2
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i16 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
+; CHECK-NEXT:    store i16 [[S_1]], i16* [[GEP_1]], align 2
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
+; CHECK-NEXT:    [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp sgt i16 [[L_2]], 16383
+; CHECK-NEXT:    [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
+; CHECK-NEXT:    store i16 [[S_2]], i16* [[GEP_2]], align 2
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
+; CHECK-NEXT:    [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp sgt i16 [[L_3]], 16383
+; CHECK-NEXT:    [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
+; CHECK-NEXT:    store i16 [[S_3]], i16* [[GEP_3]], align 2
+; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
+; CHECK-NEXT:    [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
+; CHECK-NEXT:    [[CMP_4:%.*]] = icmp sgt i16 [[L_4]], 16383
+; CHECK-NEXT:    [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
+; CHECK-NEXT:    store i16 [[S_4]], i16* [[GEP_4]], align 2
+; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
+; CHECK-NEXT:    [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
+; CHECK-NEXT:    [[CMP_5:%.*]] = icmp sgt i16 [[L_5]], 16383
+; CHECK-NEXT:    [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
+; CHECK-NEXT:    store i16 [[S_5]], i16* [[GEP_5]], align 2
+; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
+; CHECK-NEXT:    [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
+; CHECK-NEXT:    [[CMP_6:%.*]] = icmp sgt i16 [[L_6]], 16383
+; CHECK-NEXT:    [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
+; CHECK-NEXT:    store i16 [[S_6]], i16* [[GEP_6]], align 2
+; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
+; CHECK-NEXT:    [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
+; CHECK-NEXT:    [[CMP_7:%.*]] = icmp sgt i16 [[L_7]], 16383
+; CHECK-NEXT:    [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
+; CHECK-NEXT:    store i16 [[S_7]], i16* [[GEP_7]], align 2
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i16, i16* %ptr
+  %cmp.0 = icmp sgt i16 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
+  store i16 %s.0, i16* %ptr, align 2
+
+  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
+  %l.1 = load i16, i16* %gep.1
+  %cmp.1 = icmp sgt i16 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
+  store i16 %s.1, i16* %gep.1, align 2
+
+  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
+  %l.2 = load i16, i16* %gep.2
+  %cmp.2 = icmp sgt i16 %l.2, 16383
+  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
+  store i16 %s.2, i16* %gep.2, align 2
+
+  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
+  %l.3 = load i16, i16* %gep.3
+  %cmp.3 = icmp sgt i16 %l.3, 16383
+  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
+  store i16 %s.3, i16* %gep.3, align 2
+
+  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
+  %l.4 = load i16, i16* %gep.4
+  %cmp.4 = icmp sgt i16 %l.4, 16383
+  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
+  store i16 %s.4, i16* %gep.4, align 2
+
+  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
+  %l.5 = load i16, i16* %gep.5
+  %cmp.5 = icmp sgt i16 %l.5, 16383
+  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
+  store i16 %s.5, i16* %gep.5, align 2
+
+  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
+  %l.6 = load i16, i16* %gep.6
+  %cmp.6 = icmp sgt i16 %l.6, 16383
+  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
+  store i16 %s.6, i16* %gep.6, align 2
+
+  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
+  %l.7 = load i16, i16* %gep.7
+  %cmp.7 = icmp sgt i16 %l.7, 16383
+  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
+  store i16 %s.7, i16* %gep.7, align 2
+  ret void
+}
+
+define void @select_smax_4xi32(i32* %ptr, i32 %x) {
+; CHECK-LABEL: @select_smax_4xi32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp sgt i32 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
+; CHECK-NEXT:    store i32 [[S_0]], i32* [[PTR]], align 4
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i32 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
+; CHECK-NEXT:    store i32 [[S_1]], i32* [[GEP_1]], align 4
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp sgt i32 [[L_2]], 16383
+; CHECK-NEXT:    [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
+; CHECK-NEXT:    store i32 [[S_2]], i32* [[GEP_2]], align 4
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
+; CHECK-NEXT:    [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp sgt i32 [[L_3]], 16383
+; CHECK-NEXT:    [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
+; CHECK-NEXT:    store i32 [[S_3]], i32* [[GEP_3]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i32, i32* %ptr
+  %cmp.0 = icmp sgt i32 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
+  store i32 %s.0, i32* %ptr, align 4
+
+  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
+  %l.1 = load i32, i32* %gep.1
+  %cmp.1 = icmp sgt i32 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
+  store i32 %s.1, i32* %gep.1, align 4
+
+  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
+  %l.2 = load i32, i32* %gep.2
+  %cmp.2 = icmp sgt i32 %l.2, 16383
+  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
+  store i32 %s.2, i32* %gep.2, align 4
+
+  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
+  %l.3 = load i32, i32* %gep.3
+  %cmp.3 = icmp sgt i32 %l.3, 16383
+  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
+  store i32 %s.3, i32* %gep.3, align 4
+
+  ret void
+}
+
+; There is no <2 x i64> version of smax.
+define void @select_smax_2xi64(i64* %ptr, i64 %x) {
+; CHECK-LABEL: @select_smax_2xi64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp sgt i64 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
+; CHECK-NEXT:    store i64 [[S_0]], i64* [[PTR]], align 4
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sgt i64 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
+; CHECK-NEXT:    store i64 [[S_1]], i64* [[GEP_1]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i64, i64* %ptr
+  %cmp.0 = icmp sgt i64 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
+  store i64 %s.0, i64* %ptr, align 4
+
+  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
+  %l.1 = load i64, i64* %gep.1
+  %cmp.1 = icmp sgt i64 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
+  store i64 %s.1, i64* %gep.1, align 4
+
+  ret void
+}
+
+
+define void @select_smax_sge_8xi16(i16* %ptr, i16 %x) {
+; CHECK-LABEL: @select_smax_sge_8xi16(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i16, i16* [[PTR:%.*]], align 2
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp sge i16 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i16 [[L_0]], i16 16383
+; CHECK-NEXT:    store i16 [[S_0]], i16* [[PTR]], align 2
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i16, i16* [[GEP_1]], align 2
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sge i16 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i16 [[L_1]], i16 16383
+; CHECK-NEXT:    store i16 [[S_1]], i16* [[GEP_1]], align 2
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 2
+; CHECK-NEXT:    [[L_2:%.*]] = load i16, i16* [[GEP_2]], align 2
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp sge i16 [[L_2]], 16383
+; CHECK-NEXT:    [[S_2:%.*]] = select i1 [[CMP_2]], i16 [[L_2]], i16 16383
+; CHECK-NEXT:    store i16 [[S_2]], i16* [[GEP_2]], align 2
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 3
+; CHECK-NEXT:    [[L_3:%.*]] = load i16, i16* [[GEP_3]], align 2
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp sge i16 [[L_3]], 16383
+; CHECK-NEXT:    [[S_3:%.*]] = select i1 [[CMP_3]], i16 [[L_3]], i16 16383
+; CHECK-NEXT:    store i16 [[S_3]], i16* [[GEP_3]], align 2
+; CHECK-NEXT:    [[GEP_4:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 4
+; CHECK-NEXT:    [[L_4:%.*]] = load i16, i16* [[GEP_4]], align 2
+; CHECK-NEXT:    [[CMP_4:%.*]] = icmp sge i16 [[L_4]], 16383
+; CHECK-NEXT:    [[S_4:%.*]] = select i1 [[CMP_4]], i16 [[L_4]], i16 16383
+; CHECK-NEXT:    store i16 [[S_4]], i16* [[GEP_4]], align 2
+; CHECK-NEXT:    [[GEP_5:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 5
+; CHECK-NEXT:    [[L_5:%.*]] = load i16, i16* [[GEP_5]], align 2
+; CHECK-NEXT:    [[CMP_5:%.*]] = icmp sge i16 [[L_5]], 16383
+; CHECK-NEXT:    [[S_5:%.*]] = select i1 [[CMP_5]], i16 [[L_5]], i16 16383
+; CHECK-NEXT:    store i16 [[S_5]], i16* [[GEP_5]], align 2
+; CHECK-NEXT:    [[GEP_6:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 6
+; CHECK-NEXT:    [[L_6:%.*]] = load i16, i16* [[GEP_6]], align 2
+; CHECK-NEXT:    [[CMP_6:%.*]] = icmp sge i16 [[L_6]], 16383
+; CHECK-NEXT:    [[S_6:%.*]] = select i1 [[CMP_6]], i16 [[L_6]], i16 16383
+; CHECK-NEXT:    store i16 [[S_6]], i16* [[GEP_6]], align 2
+; CHECK-NEXT:    [[GEP_7:%.*]] = getelementptr inbounds i16, i16* [[PTR]], i16 7
+; CHECK-NEXT:    [[L_7:%.*]] = load i16, i16* [[GEP_7]], align 2
+; CHECK-NEXT:    [[CMP_7:%.*]] = icmp sge i16 [[L_7]], 16383
+; CHECK-NEXT:    [[S_7:%.*]] = select i1 [[CMP_7]], i16 [[L_7]], i16 16383
+; CHECK-NEXT:    store i16 [[S_7]], i16* [[GEP_7]], align 2
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i16, i16* %ptr
+  %cmp.0 = icmp sge i16 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i16 %l.0, i16 16383
+  store i16 %s.0, i16* %ptr, align 2
+
+  %gep.1 = getelementptr inbounds i16, i16* %ptr, i16 1
+  %l.1 = load i16, i16* %gep.1
+  %cmp.1 = icmp sge i16 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i16 %l.1, i16 16383
+  store i16 %s.1, i16* %gep.1, align 2
+
+  %gep.2 = getelementptr inbounds i16, i16* %ptr, i16 2
+  %l.2 = load i16, i16* %gep.2
+  %cmp.2 = icmp sge i16 %l.2, 16383
+  %s.2 = select i1 %cmp.2, i16 %l.2, i16 16383
+  store i16 %s.2, i16* %gep.2, align 2
+
+  %gep.3 = getelementptr inbounds i16, i16* %ptr, i16 3
+  %l.3 = load i16, i16* %gep.3
+  %cmp.3 = icmp sge i16 %l.3, 16383
+  %s.3 = select i1 %cmp.3, i16 %l.3, i16 16383
+  store i16 %s.3, i16* %gep.3, align 2
+
+  %gep.4 = getelementptr inbounds i16, i16* %ptr, i16 4
+  %l.4 = load i16, i16* %gep.4
+  %cmp.4 = icmp sge i16 %l.4, 16383
+  %s.4 = select i1 %cmp.4, i16 %l.4, i16 16383
+  store i16 %s.4, i16* %gep.4, align 2
+
+  %gep.5 = getelementptr inbounds i16, i16* %ptr, i16 5
+  %l.5 = load i16, i16* %gep.5
+  %cmp.5 = icmp sge i16 %l.5, 16383
+  %s.5 = select i1 %cmp.5, i16 %l.5, i16 16383
+  store i16 %s.5, i16* %gep.5, align 2
+
+  %gep.6 = getelementptr inbounds i16, i16* %ptr, i16 6
+  %l.6 = load i16, i16* %gep.6
+  %cmp.6 = icmp sge i16 %l.6, 16383
+  %s.6 = select i1 %cmp.6, i16 %l.6, i16 16383
+  store i16 %s.6, i16* %gep.6, align 2
+
+  %gep.7 = getelementptr inbounds i16, i16* %ptr, i16 7
+  %l.7 = load i16, i16* %gep.7
+  %cmp.7 = icmp sge i16 %l.7, 16383
+  %s.7 = select i1 %cmp.7, i16 %l.7, i16 16383
+  store i16 %s.7, i16* %gep.7, align 2
+  ret void
+}
+
+define void @select_smax_sge_4xi32(i32* %ptr, i32 %x) {
+; CHECK-LABEL: @select_smax_sge_4xi32(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i32, i32* [[PTR:%.*]], align 4
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp sge i32 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i32 [[L_0]], i32 16383
+; CHECK-NEXT:    store i32 [[S_0]], i32* [[PTR]], align 4
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i32, i32* [[GEP_1]], align 4
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sge i32 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i32 [[L_1]], i32 16383
+; CHECK-NEXT:    store i32 [[S_1]], i32* [[GEP_1]], align 4
+; CHECK-NEXT:    [[GEP_2:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 2
+; CHECK-NEXT:    [[L_2:%.*]] = load i32, i32* [[GEP_2]], align 4
+; CHECK-NEXT:    [[CMP_2:%.*]] = icmp sge i32 [[L_2]], 16383
+; CHECK-NEXT:    [[S_2:%.*]] = select i1 [[CMP_2]], i32 [[L_2]], i32 16383
+; CHECK-NEXT:    store i32 [[S_2]], i32* [[GEP_2]], align 4
+; CHECK-NEXT:    [[GEP_3:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i32 3
+; CHECK-NEXT:    [[L_3:%.*]] = load i32, i32* [[GEP_3]], align 4
+; CHECK-NEXT:    [[CMP_3:%.*]] = icmp sge i32 [[L_3]], 16383
+; CHECK-NEXT:    [[S_3:%.*]] = select i1 [[CMP_3]], i32 [[L_3]], i32 16383
+; CHECK-NEXT:    store i32 [[S_3]], i32* [[GEP_3]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i32, i32* %ptr
+  %cmp.0 = icmp sge i32 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i32 %l.0, i32 16383
+  store i32 %s.0, i32* %ptr, align 4
+
+  %gep.1 = getelementptr inbounds i32, i32* %ptr, i32 1
+  %l.1 = load i32, i32* %gep.1
+  %cmp.1 = icmp sge i32 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i32 %l.1, i32 16383
+  store i32 %s.1, i32* %gep.1, align 4
+
+  %gep.2 = getelementptr inbounds i32, i32* %ptr, i32 2
+  %l.2 = load i32, i32* %gep.2
+  %cmp.2 = icmp sge i32 %l.2, 16383
+  %s.2 = select i1 %cmp.2, i32 %l.2, i32 16383
+  store i32 %s.2, i32* %gep.2, align 4
+
+  %gep.3 = getelementptr inbounds i32, i32* %ptr, i32 3
+  %l.3 = load i32, i32* %gep.3
+  %cmp.3 = icmp sge i32 %l.3, 16383
+  %s.3 = select i1 %cmp.3, i32 %l.3, i32 16383
+  store i32 %s.3, i32* %gep.3, align 4
+
+  ret void
+}
+
+; There is no <2 x i64> version of smax.
+define void @select_smax_sge_2xi64(i64* %ptr, i64 %x) {
+; CHECK-LABEL: @select_smax_sge_2xi64(
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[L_0:%.*]] = load i64, i64* [[PTR:%.*]], align 8
+; CHECK-NEXT:    [[CMP_0:%.*]] = icmp sge i64 [[L_0]], 16383
+; CHECK-NEXT:    [[S_0:%.*]] = select i1 [[CMP_0]], i64 [[L_0]], i64 16383
+; CHECK-NEXT:    store i64 [[S_0]], i64* [[PTR]], align 4
+; CHECK-NEXT:    [[GEP_1:%.*]] = getelementptr inbounds i64, i64* [[PTR]], i64 1
+; CHECK-NEXT:    [[L_1:%.*]] = load i64, i64* [[GEP_1]], align 8
+; CHECK-NEXT:    [[CMP_1:%.*]] = icmp sge i64 [[L_1]], 16383
+; CHECK-NEXT:    [[S_1:%.*]] = select i1 [[CMP_1]], i64 [[L_1]], i64 16383
+; CHECK-NEXT:    store i64 [[S_1]], i64* [[GEP_1]], align 4
+; CHECK-NEXT:    ret void
+;
+entry:
+  %l.0 = load i64, i64* %ptr
+  %cmp.0 = icmp sge i64 %l.0, 16383
+  %s.0 = select i1 %cmp.0, i64 %l.0, i64 16383
+  store i64 %s.0, i64* %ptr, align 4
+
+  %gep.1 = getelementptr inbounds i64, i64* %ptr, i64 1
+  %l.1 = load i64, i64* %gep.1
+  %cmp.1 = icmp sge i64 %l.1, 16383
+  %s.1 = select i1 %cmp.1, i64 %l.1, i64 16383
+  store i64 %s.1, i64* %gep.1, align 4
+
+  ret void
+}


        


More information about the llvm-commits mailing list