[llvm] r224127 - [AVX512] Enabling MIN/MAX lowering.

Robert Khasanov rob.khasanov at gmail.com
Fri Dec 12 07:10:44 PST 2014


Author: rkhasanov
Date: Fri Dec 12 09:10:43 2014
New Revision: 224127

URL: http://llvm.org/viewvc/llvm-project?rev=224127&view=rev
Log:
[AVX512] Enabling MIN/MAX lowering.
Added lowering tests.

Modified:
    llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
    llvm/trunk/lib/Target/X86/X86InstrSSE.td
    llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll
    llvm/trunk/test/CodeGen/X86/avx512bw-vec-cmp.ll
    llvm/trunk/test/CodeGen/X86/avx512vl-vec-cmp.ll
    llvm/trunk/test/CodeGen/X86/vselect-minmax.ll

Modified: llvm/trunk/lib/Target/X86/X86ISelLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86ISelLowering.cpp?rev=224127&r1=224126&r2=224127&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86ISelLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86ISelLowering.cpp Fri Dec 12 09:10:43 2014
@@ -22606,6 +22606,21 @@ matchIntegerMINMAX(SDValue Cond, EVT VT,
   bool NeedSplit = false;
   switch (VT.getSimpleVT().SimpleTy) {
   default: return std::make_pair(0, false);
+  case MVT::v4i64:
+  case MVT::v2i64:
+    if (!Subtarget->hasVLX())
+      return std::make_pair(0, false);
+    break;
+  case MVT::v64i8:
+  case MVT::v32i16:
+    if (!Subtarget->hasBWI())
+      return std::make_pair(0, false);
+    break;
+  case MVT::v16i32:
+  case MVT::v8i64:
+    if (!Subtarget->hasAVX512())
+      return std::make_pair(0, false);
+    break;
   case MVT::v32i8:
   case MVT::v16i16:
   case MVT::v8i32:

Modified: llvm/trunk/lib/Target/X86/X86InstrSSE.td
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86InstrSSE.td?rev=224127&r1=224126&r2=224127&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86InstrSSE.td (original)
+++ llvm/trunk/lib/Target/X86/X86InstrSSE.td Fri Dec 12 09:10:43 2014
@@ -2852,7 +2852,7 @@ multiclass PDI_binop_rm<bits<8> opc, str
 multiclass PDI_binop_all<bits<8> opc, string OpcodeStr, SDNode Opcode,
                          ValueType OpVT128, ValueType OpVT256,
                          OpndItins itins, bit IsCommutable = 0> {
-let Predicates = [HasAVX] in
+let Predicates = [HasAVX, NoVLX] in
   defm V#NAME : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode, OpVT128,
                     VR128, loadv2i64, i128mem, itins, IsCommutable, 0>, VEX_4V;
 
@@ -2860,7 +2860,7 @@ let Constraints = "$src1 = $dst" in
   defm NAME : PDI_binop_rm<opc, OpcodeStr, Opcode, OpVT128, VR128,
                            memopv2i64, i128mem, itins, IsCommutable, 1>;
 
-let Predicates = [HasAVX2] in
+let Predicates = [HasAVX2, NoVLX] in
   defm V#NAME#Y : PDI_binop_rm<opc, !strconcat("v", OpcodeStr), Opcode,
                                OpVT256, VR256, loadv4i64, i256mem, itins,
                                IsCommutable, 0>, VEX_4V, VEX_L;
@@ -7100,7 +7100,7 @@ multiclass SS48I_binop_rm2<bits<8> opc,
        Sched<[itins.Sched.Folded, ReadAfterLd]>;
 }
 
-let Predicates = [HasAVX] in {
+let Predicates = [HasAVX, NoVLX] in {
   let isCommutable = 0 in
   defm VPMINSB   : SS48I_binop_rm<0x38, "vpminsb", X86smin, v16i8, VR128,
                                   loadv2i64, i128mem, 0, SSE_INTALU_ITINS_P>,
@@ -7131,7 +7131,7 @@ let Predicates = [HasAVX] in {
                                    SSE_INTMUL_ITINS_P, 1, 0>, VEX_4V;
 }
 
-let Predicates = [HasAVX2] in {
+let Predicates = [HasAVX2, NoVLX] in {
   let isCommutable = 0 in
   defm VPMINSBY  : SS48I_binop_rm<0x38, "vpminsb", X86smin, v32i8, VR256,
                                   loadv4i64, i256mem, 0, SSE_INTALU_ITINS_P>,

Modified: llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll?rev=224127&r1=224126&r2=224127&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512-vec-cmp.ll Fri Dec 12 09:10:43 2014
@@ -37,15 +37,15 @@ define <16 x i32> @test3(<16 x i32> %x,
   ret <16 x i32> %max
 }
 
-define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y) nounwind {
+define <16 x i32> @test4_unsigned(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
 ; CHECK-LABEL: test4_unsigned:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpnltud %zmm1, %zmm0, %k1
-; CHECK-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovdqa32 %zmm2, %zmm1 {%k1}
 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
 ; CHECK-NEXT:    retq
   %mask = icmp uge <16 x i32> %x, %y
-  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
+  %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
   ret <16 x i32> %max
 }
 
@@ -61,15 +61,15 @@ define <8 x i64> @test5(<8 x i64> %x, <8
   ret <8 x i64> %max
 }
 
-define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y) nounwind {
+define <8 x i64> @test6_unsigned(<8 x i64> %x, <8 x i64> %y, <8 x i64> %x1) nounwind {
 ; CHECK-LABEL: test6_unsigned:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k1
-; CHECK-NEXT:    vmovdqa64 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovdqa64 %zmm2, %zmm1 {%k1}
 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
 ; CHECK-NEXT:    retq
   %mask = icmp ugt <8 x i64> %x, %y
-  %max = select <8 x i1> %mask, <8 x i64> %x, <8 x i64> %y
+  %max = select <8 x i1> %mask, <8 x i64> %x1, <8 x i64> %y
   ret <8 x i64> %max
 }
 
@@ -196,15 +196,15 @@ define <8 x i64> @test15(<8 x i64>%a, <8
   ret <8 x i64>%res
 }
 
-define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y) nounwind {
+define <16 x i32> @test16(<16 x i32> %x, <16 x i32> %y, <16 x i32> %x1) nounwind {
 ; CHECK-LABEL: test16:
 ; CHECK:       ## BB#0:
 ; CHECK-NEXT:    vpcmpled %zmm0, %zmm1, %k1
-; CHECK-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1}
+; CHECK-NEXT:    vmovdqa32 %zmm2, %zmm1 {%k1}
 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
 ; CHECK-NEXT:    retq
   %mask = icmp sge <16 x i32> %x, %y
-  %max = select <16 x i1> %mask, <16 x i32> %x, <16 x i32> %y
+  %max = select <16 x i1> %mask, <16 x i32> %x1, <16 x i32> %y
   ret <16 x i32> %max
 }
 

Modified: llvm/trunk/test/CodeGen/X86/avx512bw-vec-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512bw-vec-cmp.ll?rev=224127&r1=224126&r2=224127&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512bw-vec-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512bw-vec-cmp.ll Fri Dec 12 09:10:43 2014
@@ -14,9 +14,9 @@ define <64 x i8> @test1(<64 x i8> %x, <6
 ; CHECK: vpcmpgtb {{.*%k[0-7]}}
 ; CHECK: vmovdqu8 {{.*}}%k1
 ; CHECK: ret
-define <64 x i8> @test2(<64 x i8> %x, <64 x i8> %y) nounwind {
+define <64 x i8> @test2(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1) nounwind {
   %mask = icmp sgt <64 x i8> %x, %y
-  %max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %y
+  %max = select <64 x i1> %mask, <64 x i8> %x1, <64 x i8> %y
   ret <64 x i8> %max
 }
 
@@ -34,9 +34,9 @@ define <32 x i16> @test3(<32 x i16> %x,
 ; CHECK: vpcmpnleub {{.*%k[0-7]}}
 ; CHECK: vmovdqu8 {{.*}}%k1
 ; CHECK: ret
-define <64 x i8> @test4(<64 x i8> %x, <64 x i8> %y) nounwind {
+define <64 x i8> @test4(<64 x i8> %x, <64 x i8> %y, <64 x i8> %x1) nounwind {
   %mask = icmp ugt <64 x i8> %x, %y
-  %max = select <64 x i1> %mask, <64 x i8> %x, <64 x i8> %y
+  %max = select <64 x i1> %mask, <64 x i8> %x1, <64 x i8> %y
   ret <64 x i8> %max
 }
 

Modified: llvm/trunk/test/CodeGen/X86/avx512vl-vec-cmp.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/avx512vl-vec-cmp.ll?rev=224127&r1=224126&r2=224127&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/avx512vl-vec-cmp.ll (original)
+++ llvm/trunk/test/CodeGen/X86/avx512vl-vec-cmp.ll Fri Dec 12 09:10:43 2014
@@ -14,9 +14,9 @@ define <4 x i64> @test256_1(<4 x i64> %x
 ; CHECK: vpcmpgtq {{.*%k[0-7]}}
 ; CHECK: vmovdqa64 {{.*}}%k1
 ; CHECK: ret
-define <4 x i64> @test256_2(<4 x i64> %x, <4 x i64> %y) nounwind {
+define <4 x i64> @test256_2(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1) nounwind {
   %mask = icmp sgt <4 x i64> %x, %y
-  %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %y
+  %max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y
   ret <4 x i64> %max
 }
 
@@ -34,9 +34,9 @@ define <8 x i32> @test256_3(<8 x i32> %x
 ; CHECK: vpcmpnleuq {{.*%k[0-7]}}
 ; CHECK: vmovdqa64 {{.*}}%k1
 ; CHECK: ret
-define <4 x i64> @test256_4(<4 x i64> %x, <4 x i64> %y) nounwind {
+define <4 x i64> @test256_4(<4 x i64> %x, <4 x i64> %y, <4 x i64> %x1) nounwind {
   %mask = icmp ugt <4 x i64> %x, %y
-  %max = select <4 x i1> %mask, <4 x i64> %x, <4 x i64> %y
+  %max = select <4 x i1> %mask, <4 x i64> %x1, <4 x i64> %y
   ret <4 x i64> %max
 }
 
@@ -204,9 +204,9 @@ define <2 x i64> @test128_1(<2 x i64> %x
 ; CHECK: vpcmpgtq {{.*%k[0-7]}}
 ; CHECK: vmovdqa64 {{.*}}%k1
 ; CHECK: ret
-define <2 x i64> @test128_2(<2 x i64> %x, <2 x i64> %y) nounwind {
+define <2 x i64> @test128_2(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1) nounwind {
   %mask = icmp sgt <2 x i64> %x, %y
-  %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %y
+  %max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y
   ret <2 x i64> %max
 }
 
@@ -224,9 +224,9 @@ define <4 x i32> @test128_3(<4 x i32> %x
 ; CHECK: vpcmpnleuq {{.*%k[0-7]}}
 ; CHECK: vmovdqa64 {{.*}}%k1
 ; CHECK: ret
-define <2 x i64> @test128_4(<2 x i64> %x, <2 x i64> %y) nounwind {
+define <2 x i64> @test128_4(<2 x i64> %x, <2 x i64> %y, <2 x i64> %x1) nounwind {
   %mask = icmp ugt <2 x i64> %x, %y
-  %max = select <2 x i1> %mask, <2 x i64> %x, <2 x i64> %y
+  %max = select <2 x i1> %mask, <2 x i64> %x1, <2 x i64> %y
   ret <2 x i64> %max
 }
 

Modified: llvm/trunk/test/CodeGen/X86/vselect-minmax.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/vselect-minmax.ll?rev=224127&r1=224126&r2=224127&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/vselect-minmax.ll (original)
+++ llvm/trunk/test/CodeGen/X86/vselect-minmax.ll Fri Dec 12 09:10:43 2014
@@ -2,6 +2,8 @@
 ; RUN: llc -march=x86-64 -mcpu=corei7 < %s | FileCheck %s -check-prefix=SSE4
 ; RUN: llc -march=x86-64 -mcpu=corei7-avx < %s | FileCheck %s -check-prefix=AVX1
 ; RUN: llc -march=x86-64 -mcpu=core-avx2 -mattr=+avx2 < %s | FileCheck %s -check-prefix=AVX2
+; RUN: llc -march=x86-64 -mcpu=knl < %s | FileCheck %s  -check-prefix=AVX2 -check-prefix=AVX512F
+; RUN: llc -march=x86-64 -mcpu=skx < %s | FileCheck %s  -check-prefix=AVX512BW -check-prefix=AVX512VL -check-prefix=AVX512F
 
 define void @test1(i8* nocapture %a, i8* nocapture %b) nounwind {
 vector.ph:
@@ -33,6 +35,9 @@ for.end:
 
 ; AVX2-LABEL: test1:
 ; AVX2: vpminsb
+
+; AVX512VL-LABEL: test1:
+; AVX512VL: vpminsb
 }
 
 define void @test2(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -65,6 +70,9 @@ for.end:
 
 ; AVX2-LABEL: test2:
 ; AVX2: vpminsb
+
+; AVX512VL-LABEL: test2:
+; AVX512VL: vpminsb
 }
 
 define void @test3(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -97,6 +105,9 @@ for.end:
 
 ; AVX2-LABEL: test3:
 ; AVX2: vpmaxsb
+
+; AVX512VL-LABEL: test3:
+; AVX512VL: vpmaxsb
 }
 
 define void @test4(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -129,6 +140,9 @@ for.end:
 
 ; AVX2-LABEL: test4:
 ; AVX2: vpmaxsb
+
+; AVX512VL-LABEL: test4:
+; AVX512VL: vpmaxsb
 }
 
 define void @test5(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -161,6 +175,9 @@ for.end:
 
 ; AVX2-LABEL: test5:
 ; AVX2: vpminub
+
+; AVX512VL-LABEL: test5:
+; AVX512VL: vpminub 
 }
 
 define void @test6(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -193,6 +210,9 @@ for.end:
 
 ; AVX2-LABEL: test6:
 ; AVX2: vpminub
+
+; AVX512VL-LABEL: test6:
+; AVX512VL: vpminub
 }
 
 define void @test7(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -225,6 +245,9 @@ for.end:
 
 ; AVX2-LABEL: test7:
 ; AVX2: vpmaxub
+
+; AVX512VL-LABEL: test7:
+; AVX512VL: vpmaxub
 }
 
 define void @test8(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -257,6 +280,9 @@ for.end:
 
 ; AVX2-LABEL: test8:
 ; AVX2: vpmaxub
+
+; AVX512VL-LABEL: test8:
+; AVX512VL: vpmaxub
 }
 
 define void @test9(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -289,6 +315,9 @@ for.end:
 
 ; AVX2-LABEL: test9:
 ; AVX2: vpminsw
+
+; AVX512VL-LABEL: test9:
+; AVX512VL: vpminsw 
 }
 
 define void @test10(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -321,6 +350,9 @@ for.end:
 
 ; AVX2-LABEL: test10:
 ; AVX2: vpminsw
+
+; AVX512VL-LABEL: test10:
+; AVX512VL: vpminsw
 }
 
 define void @test11(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -353,6 +385,9 @@ for.end:
 
 ; AVX2-LABEL: test11:
 ; AVX2: vpmaxsw
+
+; AVX512VL-LABEL: test11:
+; AVX512VL: vpmaxsw
 }
 
 define void @test12(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -385,6 +420,9 @@ for.end:
 
 ; AVX2-LABEL: test12:
 ; AVX2: vpmaxsw
+
+; AVX512VL-LABEL: test12:
+; AVX512VL: vpmaxsw
 }
 
 define void @test13(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -417,6 +455,9 @@ for.end:
 
 ; AVX2-LABEL: test13:
 ; AVX2: vpminuw
+
+; AVX512VL-LABEL: test13:
+; AVX512VL: vpminuw
 }
 
 define void @test14(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -449,6 +490,9 @@ for.end:
 
 ; AVX2-LABEL: test14:
 ; AVX2: vpminuw
+
+; AVX512VL-LABEL: test14:
+; AVX512VL: vpminuw 
 }
 
 define void @test15(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -481,6 +525,9 @@ for.end:
 
 ; AVX2-LABEL: test15:
 ; AVX2: vpmaxuw
+
+; AVX512VL-LABEL: test15:
+; AVX512VL: vpmaxuw
 }
 
 define void @test16(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -513,6 +560,9 @@ for.end:
 
 ; AVX2-LABEL: test16:
 ; AVX2: vpmaxuw
+
+; AVX512VL-LABEL: test16:
+; AVX512VL: vpmaxuw
 }
 
 define void @test17(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -545,6 +595,9 @@ for.end:
 
 ; AVX2-LABEL: test17:
 ; AVX2: vpminsd
+
+; AVX512VL-LABEL: test17:
+; AVX512VL: vpminsd
 }
 
 define void @test18(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -577,6 +630,9 @@ for.end:
 
 ; AVX2-LABEL: test18:
 ; AVX2: vpminsd
+
+; AVX512VL-LABEL: test18:
+; AVX512VL: vpminsd
 }
 
 define void @test19(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -609,6 +665,9 @@ for.end:
 
 ; AVX2-LABEL: test19:
 ; AVX2: vpmaxsd
+
+; AVX512VL-LABEL: test19:
+; AVX512VL: vpmaxsd
 }
 
 define void @test20(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -641,6 +700,9 @@ for.end:
 
 ; AVX2-LABEL: test20:
 ; AVX2: vpmaxsd
+
+; AVX512VL-LABEL: test20:
+; AVX512VL: vpmaxsd
 }
 
 define void @test21(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -673,6 +735,9 @@ for.end:
 
 ; AVX2-LABEL: test21:
 ; AVX2: vpminud
+
+; AVX512VL-LABEL: test21:
+; AVX512VL: vpminud
 }
 
 define void @test22(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -705,6 +770,9 @@ for.end:
 
 ; AVX2-LABEL: test22:
 ; AVX2: vpminud
+
+; AVX512VL-LABEL: test22:
+; AVX512VL: vpminud
 }
 
 define void @test23(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -737,6 +805,9 @@ for.end:
 
 ; AVX2-LABEL: test23:
 ; AVX2: vpmaxud
+
+; AVX512VL-LABEL: test23:
+; AVX512VL: vpmaxud
 }
 
 define void @test24(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -769,6 +840,9 @@ for.end:
 
 ; AVX2-LABEL: test24:
 ; AVX2: vpmaxud
+
+; AVX512VL-LABEL: test24:
+; AVX512VL: vpmaxud
 }
 
 define void @test25(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -795,6 +869,9 @@ for.end:
 
 ; AVX2-LABEL: test25:
 ; AVX2: vpminsb
+
+; AVX512VL-LABEL: test25:
+; AVX512VL: vpminsb
 }
 
 define void @test26(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -821,6 +898,9 @@ for.end:
 
 ; AVX2-LABEL: test26:
 ; AVX2: vpminsb
+
+; AVX512VL-LABEL: test26:
+; AVX512VL: vpminsb
 }
 
 define void @test27(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -847,6 +927,9 @@ for.end:
 
 ; AVX2-LABEL: test27:
 ; AVX2: vpmaxsb
+
+; AVX512VL-LABEL: test27:
+; AVX512VL: vpmaxsb
 }
 
 define void @test28(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -873,6 +956,9 @@ for.end:
 
 ; AVX2-LABEL: test28:
 ; AVX2: vpmaxsb
+
+; AVX512VL-LABEL: test28:
+; AVX512VL: vpmaxsb
 }
 
 define void @test29(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -899,6 +985,9 @@ for.end:
 
 ; AVX2-LABEL: test29:
 ; AVX2: vpminub
+
+; AVX512VL-LABEL: test29:
+; AVX512VL: vpminub
 }
 
 define void @test30(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -925,6 +1014,9 @@ for.end:
 
 ; AVX2-LABEL: test30:
 ; AVX2: vpminub
+
+; AVX512VL-LABEL: test30:
+; AVX512VL: vpminub
 }
 
 define void @test31(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -951,6 +1043,9 @@ for.end:
 
 ; AVX2-LABEL: test31:
 ; AVX2: vpmaxub
+
+; AVX512VL-LABEL: test31:
+; AVX512VL: vpmaxub
 }
 
 define void @test32(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -977,6 +1072,9 @@ for.end:
 
 ; AVX2-LABEL: test32:
 ; AVX2: vpmaxub
+
+; AVX512VL-LABEL: test32:
+; AVX512VL: vpmaxub
 }
 
 define void @test33(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1003,6 +1101,9 @@ for.end:
 
 ; AVX2-LABEL: test33:
 ; AVX2: vpminsw
+
+; AVX512VL-LABEL: test33:
+; AVX512VL: vpminsw 
 }
 
 define void @test34(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1029,6 +1130,9 @@ for.end:
 
 ; AVX2-LABEL: test34:
 ; AVX2: vpminsw
+
+; AVX512VL-LABEL: test34:
+; AVX512VL: vpminsw
 }
 
 define void @test35(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1055,6 +1159,9 @@ for.end:
 
 ; AVX2-LABEL: test35:
 ; AVX2: vpmaxsw
+
+; AVX512VL-LABEL: test35:
+; AVX512VL: vpmaxsw
 }
 
 define void @test36(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1081,6 +1188,9 @@ for.end:
 
 ; AVX2-LABEL: test36:
 ; AVX2: vpmaxsw
+
+; AVX512VL-LABEL: test36:
+; AVX512VL: vpmaxsw
 }
 
 define void @test37(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1107,6 +1217,9 @@ for.end:
 
 ; AVX2-LABEL: test37:
 ; AVX2: vpminuw
+
+; AVX512VL-LABEL: test37:
+; AVX512VL: vpminuw
 }
 
 define void @test38(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1133,6 +1246,9 @@ for.end:
 
 ; AVX2-LABEL: test38:
 ; AVX2: vpminuw
+
+; AVX512VL-LABEL: test38:
+; AVX512VL: vpminuw
 }
 
 define void @test39(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1159,6 +1275,9 @@ for.end:
 
 ; AVX2-LABEL: test39:
 ; AVX2: vpmaxuw
+
+; AVX512VL-LABEL: test39:
+; AVX512VL: vpmaxuw
 }
 
 define void @test40(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1185,6 +1304,9 @@ for.end:
 
 ; AVX2-LABEL: test40:
 ; AVX2: vpmaxuw
+
+; AVX512VL-LABEL: test40:
+; AVX512VL: vpmaxuw
 }
 
 define void @test41(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -1211,6 +1333,9 @@ for.end:
 
 ; AVX2-LABEL: test41:
 ; AVX2: vpminsd
+
+; AVX512VL-LABEL: test41:
+; AVX512VL: vpminsd
 }
 
 define void @test42(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -1237,6 +1362,9 @@ for.end:
 
 ; AVX2-LABEL: test42:
 ; AVX2: vpminsd
+
+; AVX512VL-LABEL: test42:
+; AVX512VL: vpminsd
 }
 
 define void @test43(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -1263,6 +1391,9 @@ for.end:
 
 ; AVX2-LABEL: test43:
 ; AVX2: vpmaxsd
+
+; AVX512VL-LABEL: test43:
+; AVX512VL: vpmaxsd
 }
 
 define void @test44(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -1289,6 +1420,9 @@ for.end:
 
 ; AVX2-LABEL: test44:
 ; AVX2: vpmaxsd
+
+; AVX512VL-LABEL: test44:
+; AVX512VL: vpmaxsd
 }
 
 define void @test45(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -1315,6 +1449,9 @@ for.end:
 
 ; AVX2-LABEL: test45:
 ; AVX2: vpminud
+
+; AVX512VL-LABEL: test45:
+; AVX512VL: vpminud
 }
 
 define void @test46(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -1341,6 +1478,9 @@ for.end:
 
 ; AVX2-LABEL: test46:
 ; AVX2: vpminud
+
+; AVX512VL-LABEL: test46:
+; AVX512VL: vpminud
 }
 
 define void @test47(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -1367,6 +1507,9 @@ for.end:
 
 ; AVX2-LABEL: test47:
 ; AVX2: vpmaxud
+
+; AVX512VL-LABEL: test47:
+; AVX512VL: vpmaxud
 }
 
 define void @test48(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -1393,6 +1536,9 @@ for.end:
 
 ; AVX2-LABEL: test48:
 ; AVX2: vpmaxud
+
+; AVX512VL-LABEL: test48:
+; AVX512VL: vpmaxud
 }
 
 define void @test49(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -1425,6 +1571,9 @@ for.end:
 
 ; AVX2-LABEL: test49:
 ; AVX2: vpmaxsb
+
+; AVX512VL-LABEL: test49:
+; AVX512VL: vpmaxsb
 }
 
 define void @test50(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -1457,6 +1606,9 @@ for.end:
 
 ; AVX2-LABEL: test50:
 ; AVX2: vpmaxsb
+
+; AVX512VL-LABEL: test50:
+; AVX512VL: vpmaxsb
 }
 
 define void @test51(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -1489,6 +1641,9 @@ for.end:
 
 ; AVX2-LABEL: test51:
 ; AVX2: vpminsb
+
+; AVX512VL-LABEL: test51:
+; AVX512VL: vpminsb
 }
 
 define void @test52(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -1521,6 +1676,9 @@ for.end:
 
 ; AVX2-LABEL: test52:
 ; AVX2: vpminsb
+
+; AVX512VL-LABEL: test52:
+; AVX512VL: vpminsb
 }
 
 define void @test53(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -1553,6 +1711,9 @@ for.end:
 
 ; AVX2-LABEL: test53:
 ; AVX2: vpmaxub
+
+; AVX512VL-LABEL: test53:
+; AVX512VL: vpmaxub
 }
 
 define void @test54(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -1585,6 +1746,9 @@ for.end:
 
 ; AVX2-LABEL: test54:
 ; AVX2: vpmaxub
+
+; AVX512VL-LABEL: test54:
+; AVX512VL: vpmaxub
 }
 
 define void @test55(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -1617,6 +1781,9 @@ for.end:
 
 ; AVX2-LABEL: test55:
 ; AVX2: vpminub
+
+; AVX512VL-LABEL: test55:
+; AVX512VL: vpminub
 }
 
 define void @test56(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -1649,6 +1816,9 @@ for.end:
 
 ; AVX2-LABEL: test56:
 ; AVX2: vpminub
+
+; AVX512VL-LABEL: test56:
+; AVX512VL: vpminub
 }
 
 define void @test57(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1681,6 +1851,9 @@ for.end:
 
 ; AVX2-LABEL: test57:
 ; AVX2: vpmaxsw
+
+; AVX512VL-LABEL: test57:
+; AVX512VL: vpmaxsw
 }
 
 define void @test58(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1713,6 +1886,9 @@ for.end:
 
 ; AVX2-LABEL: test58:
 ; AVX2: vpmaxsw
+
+; AVX512VL-LABEL: test58:
+; AVX512VL: vpmaxsw
 }
 
 define void @test59(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1745,6 +1921,9 @@ for.end:
 
 ; AVX2-LABEL: test59:
 ; AVX2: vpminsw
+
+; AVX512VL-LABEL: test59:
+; AVX512VL: vpminsw
 }
 
 define void @test60(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1777,6 +1956,9 @@ for.end:
 
 ; AVX2-LABEL: test60:
 ; AVX2: vpminsw
+
+; AVX512VL-LABEL: test60:
+; AVX512VL: vpminsw
 }
 
 define void @test61(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1809,6 +1991,9 @@ for.end:
 
 ; AVX2-LABEL: test61:
 ; AVX2: vpmaxuw
+
+; AVX512VL-LABEL: test61:
+; AVX512VL: vpmaxuw
 }
 
 define void @test62(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1841,6 +2026,9 @@ for.end:
 
 ; AVX2-LABEL: test62:
 ; AVX2: vpmaxuw
+
+; AVX512VL-LABEL: test62:
+; AVX512VL: vpmaxuw
 }
 
 define void @test63(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1873,6 +2061,9 @@ for.end:
 
 ; AVX2-LABEL: test63:
 ; AVX2: vpminuw
+
+; AVX512VL-LABEL: test63:
+; AVX512VL: vpminuw
 }
 
 define void @test64(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -1905,6 +2096,9 @@ for.end:
 
 ; AVX2-LABEL: test64:
 ; AVX2: vpminuw
+
+; AVX512VL-LABEL: test64:
+; AVX512VL: vpminuw
 }
 
 define void @test65(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -1937,6 +2131,9 @@ for.end:
 
 ; AVX2-LABEL: test65:
 ; AVX2: vpmaxsd
+
+; AVX512VL-LABEL: test65:
+; AVX512VL: vpmaxsd
 }
 
 define void @test66(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -1969,6 +2166,9 @@ for.end:
 
 ; AVX2-LABEL: test66:
 ; AVX2: vpmaxsd
+
+; AVX512VL-LABEL: test66:
+; AVX512VL: vpmaxsd
 }
 
 define void @test67(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2001,6 +2201,9 @@ for.end:
 
 ; AVX2-LABEL: test67:
 ; AVX2: vpminsd
+
+; AVX512VL-LABEL: test67:
+; AVX512VL: vpminsd
 }
 
 define void @test68(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2033,6 +2236,9 @@ for.end:
 
 ; AVX2-LABEL: test68:
 ; AVX2: vpminsd
+
+; AVX512VL-LABEL: test68:
+; AVX512VL: vpminsd
 }
 
 define void @test69(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2065,6 +2271,9 @@ for.end:
 
 ; AVX2-LABEL: test69:
 ; AVX2: vpmaxud
+
+; AVX512VL-LABEL: test69:
+; AVX512VL: vpmaxud
 }
 
 define void @test70(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2097,6 +2306,9 @@ for.end:
 
 ; AVX2-LABEL: test70:
 ; AVX2: vpmaxud
+
+; AVX512VL-LABEL: test70:
+; AVX512VL: vpmaxud
 }
 
 define void @test71(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2129,6 +2341,9 @@ for.end:
 
 ; AVX2-LABEL: test71:
 ; AVX2: vpminud
+
+; AVX512VL-LABEL: test71:
+; AVX512VL: vpminud
 }
 
 define void @test72(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2161,6 +2376,9 @@ for.end:
 
 ; AVX2-LABEL: test72:
 ; AVX2: vpminud
+
+; AVX512VL-LABEL: test72:
+; AVX512VL: vpminud
 }
 
 define void @test73(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -2187,6 +2405,9 @@ for.end:
 
 ; AVX2-LABEL: test73:
 ; AVX2: vpmaxsb
+
+; AVX512VL-LABEL: test73:
+; AVX512VL: vpmaxsb
 }
 
 define void @test74(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -2213,6 +2434,9 @@ for.end:
 
 ; AVX2-LABEL: test74:
 ; AVX2: vpmaxsb
+
+; AVX512VL-LABEL: test74:
+; AVX512VL: vpmaxsb 
 }
 
 define void @test75(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -2239,6 +2463,9 @@ for.end:
 
 ; AVX2-LABEL: test75:
 ; AVX2: vpminsb
+
+; AVX512VL-LABEL: test75:
+; AVX512VL: vpminsb
 }
 
 define void @test76(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -2265,6 +2492,9 @@ for.end:
 
 ; AVX2-LABEL: test76:
 ; AVX2: vpminsb
+
+; AVX512VL-LABEL: test76:
+; AVX512VL: vpminsb
 }
 
 define void @test77(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -2291,6 +2521,9 @@ for.end:
 
 ; AVX2-LABEL: test77:
 ; AVX2: vpmaxub
+
+; AVX512VL-LABEL: test77:
+; AVX512VL: vpmaxub
 }
 
 define void @test78(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -2317,6 +2550,9 @@ for.end:
 
 ; AVX2-LABEL: test78:
 ; AVX2: vpmaxub
+
+; AVX512VL-LABEL: test78:
+; AVX512VL: vpmaxub
 }
 
 define void @test79(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -2343,6 +2579,9 @@ for.end:
 
 ; AVX2-LABEL: test79:
 ; AVX2: vpminub
+
+; AVX512VL-LABEL: test79:
+; AVX512VL: vpminub
 }
 
 define void @test80(i8* nocapture %a, i8* nocapture %b) nounwind {
@@ -2369,6 +2608,9 @@ for.end:
 
 ; AVX2-LABEL: test80:
 ; AVX2: vpminub
+
+; AVX512VL-LABEL: test80:
+; AVX512VL: vpminub
 }
 
 define void @test81(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -2395,6 +2637,9 @@ for.end:
 
 ; AVX2-LABEL: test81:
 ; AVX2: vpmaxsw
+
+; AVX512VL-LABEL: test81:
+; AVX512VL: vpmaxsw
 }
 
 define void @test82(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -2421,6 +2666,9 @@ for.end:
 
 ; AVX2-LABEL: test82:
 ; AVX2: vpmaxsw
+
+; AVX512VL-LABEL: test82:
+; AVX512VL: vpmaxsw
 }
 
 define void @test83(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -2447,6 +2695,9 @@ for.end:
 
 ; AVX2-LABEL: test83:
 ; AVX2: vpminsw
+
+; AVX512VL-LABEL: test83:
+; AVX512VL: vpminsw
 }
 
 define void @test84(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -2473,6 +2724,9 @@ for.end:
 
 ; AVX2-LABEL: test84:
 ; AVX2: vpminsw
+
+; AVX512VL-LABEL: test84:
+; AVX512VL: vpminsw
 }
 
 define void @test85(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -2499,6 +2753,9 @@ for.end:
 
 ; AVX2-LABEL: test85:
 ; AVX2: vpmaxuw
+
+; AVX512VL-LABEL: test85:
+; AVX512VL: vpmaxuw
 }
 
 define void @test86(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -2525,6 +2782,9 @@ for.end:
 
 ; AVX2-LABEL: test86:
 ; AVX2: vpmaxuw
+
+; AVX512VL-LABEL: test86:
+; AVX512VL: vpmaxuw
 }
 
 define void @test87(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -2551,6 +2811,9 @@ for.end:
 
 ; AVX2-LABEL: test87:
 ; AVX2: vpminuw
+
+; AVX512VL-LABEL: test87:
+; AVX512VL: vpminuw
 }
 
 define void @test88(i16* nocapture %a, i16* nocapture %b) nounwind {
@@ -2577,6 +2840,9 @@ for.end:
 
 ; AVX2-LABEL: test88:
 ; AVX2: vpminuw
+
+; AVX512VL-LABEL: test88:
+; AVX512VL: vpminuw
 }
 
 define void @test89(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2603,6 +2869,9 @@ for.end:
 
 ; AVX2-LABEL: test89:
 ; AVX2: vpmaxsd
+
+; AVX512VL-LABEL: test89:
+; AVX512VL: vpmaxsd
 }
 
 define void @test90(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2629,6 +2898,9 @@ for.end:
 
 ; AVX2-LABEL: test90:
 ; AVX2: vpmaxsd
+
+; AVX512VL-LABEL: test90:
+; AVX512VL: vpmaxsd
 }
 
 define void @test91(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2655,6 +2927,9 @@ for.end:
 
 ; AVX2-LABEL: test91:
 ; AVX2: vpminsd
+
+; AVX512VL-LABEL: test91:
+; AVX512VL: vpminsd
 }
 
 define void @test92(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2681,6 +2956,9 @@ for.end:
 
 ; AVX2-LABEL: test92:
 ; AVX2: vpminsd
+
+; AVX512VL-LABEL: test92:
+; AVX512VL: vpminsd
 }
 
 define void @test93(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2707,6 +2985,9 @@ for.end:
 
 ; AVX2-LABEL: test93:
 ; AVX2: vpmaxud
+
+; AVX512VL-LABEL: test93:
+; AVX512VL: vpmaxud
 }
 
 define void @test94(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2733,6 +3014,9 @@ for.end:
 
 ; AVX2-LABEL: test94:
 ; AVX2: vpmaxud
+
+; AVX512VL-LABEL: test94:
+; AVX512VL: vpmaxud
 }
 
 define void @test95(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2759,6 +3043,9 @@ for.end:
 
 ; AVX2-LABEL: test95:
 ; AVX2: vpminud
+
+; AVX512VL-LABEL: test95:
+; AVX512VL: vpminud
 }
 
 define void @test96(i32* nocapture %a, i32* nocapture %b) nounwind {
@@ -2785,4 +3072,2507 @@ for.end:
 
 ; AVX2-LABEL: test96:
 ; AVX2: vpminud
+
+; AVX512VL-LABEL: test96:
+; AVX512VL: vpminud
+}
+
+; ----------------------------
+
+define void @test97(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp slt <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test97:
+; AVX512BW: vpminsb {{.*}}
+}
+
+define void @test98(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp sle <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test98:
+; AVX512BW: vpminsb {{.*}}
+}
+
+define void @test99(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp sgt <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test99:
+; AVX512BW: vpmaxsb {{.*}}
+}
+
+define void @test100(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp sge <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test100:
+; AVX512BW: vpmaxsb {{.*}}
+}
+
+define void @test101(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp ult <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test101:
+; AVX512BW: vpminub {{.*}}
+}
+
+define void @test102(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp ule <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test102:
+; AVX512BW: vpminub {{.*}}
+}
+
+define void @test103(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp ugt <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test103:
+; AVX512BW: vpmaxub {{.*}}
+}
+
+define void @test104(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp uge <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.a, <64 x i8> %load.b
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test104:
+; AVX512BW: vpmaxub {{.*}}
+}
+
+define void @test105(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp slt <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test105:
+; AVX512BW: vpminsw {{.*}}
+}
+
+define void @test106(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp sle <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test106:
+; AVX512BW: vpminsw {{.*}}
+}
+
+define void @test107(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp sgt <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test107:
+; AVX512BW: vpmaxsw {{.*}}
+}
+
+define void @test108(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp sge <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test108:
+; AVX512BW: vpmaxsw {{.*}}
+}
+
+define void @test109(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp ult <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test109:
+; AVX512BW: vpminuw {{.*}}
+}
+
+define void @test110(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp ule <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test110:
+; AVX512BW: vpminuw {{.*}}
+}
+
+define void @test111(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp ugt <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test111:
+; AVX512BW: vpmaxuw {{.*}}
+}
+
+define void @test112(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp uge <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.a, <32 x i16> %load.b
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test112:
+; AVX512BW: vpmaxuw {{.*}}
+}
+
+define void @test113(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp slt <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test113:
+; AVX512F: vpminsd {{.*}}
+}
+
+define void @test114(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp sle <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test114:
+; AVX512F: vpminsd {{.*}}
+}
+
+define void @test115(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp sgt <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test115:
+; AVX512F: vpmaxsd {{.*}}
+}
+
+define void @test116(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp sge <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test116:
+; AVX512F: vpmaxsd {{.*}}
+}
+
+define void @test117(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp ult <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test117:
+; AVX512F: vpminud {{.*}}
+}
+
+define void @test118(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp ule <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test118:
+; AVX512F: vpminud {{.*}}
+}
+
+define void @test119(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp ugt <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test119:
+; AVX512F: vpmaxud {{.*}}
+}
+
+define void @test120(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp uge <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.a, <16 x i32> %load.b
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test120:
+; AVX512F: vpmaxud {{.*}}
+}
+
+define void @test121(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp slt <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test121:
+; AVX512F: vpminsq {{.*}}
+}
+
+define void @test122(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp sle <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test122:
+; AVX512F: vpminsq {{.*}}
+}
+
+define void @test123(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp sgt <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test123:
+; AVX512F: vpmaxsq {{.*}}
+}
+
+define void @test124(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp sge <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test124:
+; AVX512F: vpmaxsq {{.*}}
+}
+
+define void @test125(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp ult <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test125:
+; AVX512F: vpminuq {{.*}}
+}
+
+define void @test126(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp ule <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test126:
+; AVX512F: vpminuq {{.*}}
+}
+
+define void @test127(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp ugt <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test127:
+; AVX512F: vpmaxuq {{.*}}
+}
+
+define void @test128(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp uge <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.a, <8 x i64> %load.b
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test128:
+; AVX512F: vpmaxuq {{.*}}
+}
+
+define void @test129(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp slt <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test129:
+; AVX512BW: vpmaxsb
+}
+
+define void @test130(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp sle <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test130:
+; AVX512BW: vpmaxsb
+}
+
+define void @test131(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp sgt <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test131:
+; AVX512BW: vpminsb
+}
+
+define void @test132(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp sge <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test132:
+; AVX512BW: vpminsb
+}
+
+define void @test133(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp ult <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test133:
+; AVX512BW: vpmaxub
+}
+
+define void @test134(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp ule <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test134:
+; AVX512BW: vpmaxub
+}
+
+define void @test135(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp ugt <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test135:
+; AVX512BW: vpminub
+}
+
+define void @test136(i8* nocapture %a, i8* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i8* %a, i64 %index
+  %gep.b = getelementptr inbounds i8* %b, i64 %index
+  %ptr.a = bitcast i8* %gep.a to <64 x i8>*
+  %ptr.b = bitcast i8* %gep.b to <64 x i8>*
+  %load.a = load <64 x i8>* %ptr.a, align 2
+  %load.b = load <64 x i8>* %ptr.b, align 2
+  %cmp = icmp uge <64 x i8> %load.a, %load.b
+  %sel = select <64 x i1> %cmp, <64 x i8> %load.b, <64 x i8> %load.a
+  store <64 x i8> %sel, <64 x i8>* %ptr.a, align 2
+  %index.next = add i64 %index, 32
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test136:
+; AVX512BW: vpminub
+}
+
+define void @test137(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp slt <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test137:
+; AVX512BW: vpmaxsw
+}
+
+define void @test138(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp sle <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test138:
+; AVX512BW: vpmaxsw
+}
+
+define void @test139(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp sgt <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test139:
+; AVX512BW: vpminsw
+}
+
+define void @test140(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp sge <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test140:
+; AVX512BW: vpminsw
+}
+
+define void @test141(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp ult <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test141:
+; AVX512BW: vpmaxuw
+}
+
+define void @test142(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp ule <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test142:
+; AVX512BW: vpmaxuw
+}
+
+define void @test143(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp ugt <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test143:
+; AVX512BW: vpminuw
+}
+
+define void @test144(i16* nocapture %a, i16* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i16* %a, i64 %index
+  %gep.b = getelementptr inbounds i16* %b, i64 %index
+  %ptr.a = bitcast i16* %gep.a to <32 x i16>*
+  %ptr.b = bitcast i16* %gep.b to <32 x i16>*
+  %load.a = load <32 x i16>* %ptr.a, align 2
+  %load.b = load <32 x i16>* %ptr.b, align 2
+  %cmp = icmp uge <32 x i16> %load.a, %load.b
+  %sel = select <32 x i1> %cmp, <32 x i16> %load.b, <32 x i16> %load.a
+  store <32 x i16> %sel, <32 x i16>* %ptr.a, align 2
+  %index.next = add i64 %index, 16
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512BW-LABEL: test144:
+; AVX512BW: vpminuw
+}
+
+define void @test145(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp slt <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test145:
+; AVX512F: vpmaxsd
+}
+
+define void @test146(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp sle <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test146:
+; AVX512F: vpmaxsd
+}
+
+define void @test147(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp sgt <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test147:
+; AVX512F: vpminsd
+}
+
+define void @test148(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp sge <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test148:
+; AVX512F: vpminsd
+}
+
+define void @test149(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp ult <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test149:
+; AVX512F: vpmaxud
+}
+
+define void @test150(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp ule <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test150:
+; AVX512F: vpmaxud
+}
+
+define void @test151(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp ugt <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test151:
+; AVX512F: vpminud
+}
+
+define void @test152(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <16 x i32>*
+  %ptr.b = bitcast i32* %gep.b to <16 x i32>*
+  %load.a = load <16 x i32>* %ptr.a, align 2
+  %load.b = load <16 x i32>* %ptr.b, align 2
+  %cmp = icmp uge <16 x i32> %load.a, %load.b
+  %sel = select <16 x i1> %cmp, <16 x i32> %load.b, <16 x i32> %load.a
+  store <16 x i32> %sel, <16 x i32>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test152:
+; AVX512F: vpminud
+}
+
+; -----------------------
+
+define void @test153(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp slt <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test153:
+; AVX512F: vpmaxsq
+}
+
+define void @test154(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp sle <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test154:
+; AVX512F: vpmaxsq
+}
+
+define void @test155(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp sgt <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test155:
+; AVX512F: vpminsq
+}
+
+define void @test156(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp sge <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test156:
+; AVX512F: vpminsq
+}
+
+define void @test157(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp ult <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test157:
+; AVX512F: vpmaxuq
+}
+
+define void @test158(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp ule <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test158:
+; AVX512F: vpmaxuq
+}
+
+define void @test159(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp ugt <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test159:
+; AVX512F: vpminuq
+}
+
+define void @test160(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <8 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <8 x i64>*
+  %load.a = load <8 x i64>* %ptr.a, align 2
+  %load.b = load <8 x i64>* %ptr.b, align 2
+  %cmp = icmp uge <8 x i64> %load.a, %load.b
+  %sel = select <8 x i1> %cmp, <8 x i64> %load.b, <8 x i64> %load.a
+  store <8 x i64> %sel, <8 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512F-LABEL: test160:
+; AVX512F: vpminuq
+}
+
+define void @test161(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp slt <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test161:
+; AVX512VL: vpminsq
+}
+
+define void @test162(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp sle <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test162:
+; AVX512VL: vpminsq
+}
+
+define void @test163(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp sgt <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test163:
+; AVX512VL: vpmaxsq 
+}
+
+define void @test164(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp sge <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test164:
+; AVX512VL: vpmaxsq
+}
+
+define void @test165(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp ult <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test165:
+; AVX512VL: vpminuq 
+}
+
+define void @test166(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp ule <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test166:
+; AVX512VL: vpminuq
+}
+
+define void @test167(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp ugt <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test167:
+; AVX512VL: vpmaxuq
+}
+
+define void @test168(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp uge <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.a, <4 x i64> %load.b
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test168:
+; AVX512VL: vpmaxuq
+}
+
+define void @test169(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp slt <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test169:
+; AVX512VL: vpmaxsq
+}
+
+define void @test170(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp sle <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test170:
+; AVX512VL: vpmaxsq
+}
+
+define void @test171(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp sgt <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test171:
+; AVX512VL: vpminsq
+}
+
+define void @test172(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp sge <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test172:
+; AVX512VL: vpminsq
+}
+
+define void @test173(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp ult <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test173:
+; AVX512VL: vpmaxuq
+}
+
+define void @test174(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp ule <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test174:
+; AVX512VL: vpmaxuq
+}
+
+define void @test175(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp ugt <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test175:
+; AVX512VL: vpminuq
+}
+
+define void @test176(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <4 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <4 x i64>*
+  %load.a = load <4 x i64>* %ptr.a, align 2
+  %load.b = load <4 x i64>* %ptr.b, align 2
+  %cmp = icmp uge <4 x i64> %load.a, %load.b
+  %sel = select <4 x i1> %cmp, <4 x i64> %load.b, <4 x i64> %load.a
+  store <4 x i64> %sel, <4 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test176:
+; AVX512VL: vpminuq
+}
+
+define void @test177(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp slt <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test177:
+; AVX512VL: vpminsq
+}
+
+define void @test178(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp sle <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test178:
+; AVX512VL: vpminsq
+}
+
+define void @test179(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp sgt <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test179:
+; AVX512VL: vpmaxsq
+}
+
+define void @test180(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp sge <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test180:
+; AVX512VL: vpmaxsq
+}
+
+define void @test181(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp ult <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test181:
+; AVX512VL: vpminuq
+}
+
+define void @test182(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp ule <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test182:
+; AVX512VL: vpminuq
+}
+
+define void @test183(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp ugt <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test183:
+; AVX512VL: vpmaxuq
+}
+
+define void @test184(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp uge <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.a, <2 x i64> %load.b
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test184:
+; AVX512VL: vpmaxuq
+}
+
+define void @test185(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp slt <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test185:
+; AVX512VL: vpmaxsq
+}
+
+define void @test186(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp sle <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test186:
+; AVX512VL: vpmaxsq
+}
+
+define void @test187(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp sgt <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test187:
+; AVX512VL: vpminsq
+}
+
+define void @test188(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp sge <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test188:
+; AVX512VL: vpminsq
+}
+
+define void @test189(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp ult <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test189:
+; AVX512VL: vpmaxuq
+}
+
+define void @test190(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp ule <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test190:
+; AVX512VL: vpmaxuq
+}
+
+define void @test191(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp ugt <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test191:
+; AVX512VL: vpminuq
+}
+
+define void @test192(i32* nocapture %a, i32* nocapture %b) nounwind {
+vector.ph:
+  br label %vector.body
+
+vector.body:                                      ; preds = %vector.body, %vector.ph
+  %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+  %gep.a = getelementptr inbounds i32* %a, i64 %index
+  %gep.b = getelementptr inbounds i32* %b, i64 %index
+  %ptr.a = bitcast i32* %gep.a to <2 x i64>*
+  %ptr.b = bitcast i32* %gep.b to <2 x i64>*
+  %load.a = load <2 x i64>* %ptr.a, align 2
+  %load.b = load <2 x i64>* %ptr.b, align 2
+  %cmp = icmp uge <2 x i64> %load.a, %load.b
+  %sel = select <2 x i1> %cmp, <2 x i64> %load.b, <2 x i64> %load.a
+  store <2 x i64> %sel, <2 x i64>* %ptr.a, align 2
+  %index.next = add i64 %index, 8
+  %loop = icmp eq i64 %index.next, 16384
+  br i1 %loop, label %for.end, label %vector.body
+
+for.end:                                          ; preds = %vector.body
+  ret void
+
+; AVX512VL-LABEL: test192:
+; AVX512VL: vpminuq
 }





More information about the llvm-commits mailing list