[llvm] [LoongArch][NFC] Pre-commit for lowering vector mask generation to `[X]VMSK{LT,GE,NE}Z` (PR #142108)

via llvm-commits llvm-commits at lists.llvm.org
Fri May 30 01:23:35 PDT 2025


https://github.com/heiher created https://github.com/llvm/llvm-project/pull/142108

None

>From 45f4bc64327058f3cd82738f5b57615e98dd4ed8 Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui at loongson.cn>
Date: Fri, 30 May 2025 12:25:58 +0800
Subject: [PATCH] [LoongArch][NFC] Pre-commit for lowering vector mask
 generation to `[X]VMSK{LT,GE,NE}Z`

---
 llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll | 1406 +++++++++++++++++
 llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll   |  688 ++++++++
 2 files changed, 2094 insertions(+)
 create mode 100644 llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll
 create mode 100644 llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll

diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll b/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll
new file mode 100644
index 0000000000000..a934e5a549c4b
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll
@@ -0,0 +1,1406 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s
+
+define i32 @xmsk_eq_allzeros_i8(<32 x i8 > %a) {
+; CHECK-LABEL: xmsk_eq_allzeros_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -64
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT:    st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset 1, -8
+; CHECK-NEXT:    .cfi_offset 22, -16
+; CHECK-NEXT:    addi.d $fp, $sp, 64
+; CHECK-NEXT:    .cfi_def_cfa 22, 0
+; CHECK-NEXT:    bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT:    xvseqi.b $xr0, $xr0, 0
+; CHECK-NEXT:    xvst $xr0, $sp, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 7
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 8
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 8
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 9
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 9
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 10
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 10
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 11
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 11
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 12
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 12
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 13
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 13
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 14
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 14
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 15
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 16
+; CHECK-NEXT:    slli.d $a1, $a1, 15
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 17
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 16
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 18
+; CHECK-NEXT:    slli.d $a1, $a1, 17
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 19
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 18
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 20
+; CHECK-NEXT:    slli.d $a1, $a1, 19
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 21
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 20
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 22
+; CHECK-NEXT:    slli.d $a1, $a1, 21
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 23
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 22
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 24
+; CHECK-NEXT:    slli.d $a1, $a1, 23
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 25
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 24
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 26
+; CHECK-NEXT:    slli.d $a1, $a1, 25
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 27
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 26
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 28
+; CHECK-NEXT:    slli.d $a1, $a1, 27
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 29
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 28
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 30
+; CHECK-NEXT:    slli.d $a1, $a1, 29
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.b $a1, $sp, 31
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 30
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    slli.d $a1, $a1, 31
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; CHECK-NEXT:    addi.d $sp, $fp, -64
+; CHECK-NEXT:    ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT:    ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT:    addi.d $sp, $sp, 64
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp eq <32 x i8> %a, splat (i8 0)
+  %2 = bitcast <32 x i1> %1 to i32
+  ret i32 %2
+}
+
+define i32 @xmsk_eq_allones_i8(<32 x i8 > %a) {
+; CHECK-LABEL: xmsk_eq_allones_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -64
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT:    st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset 1, -8
+; CHECK-NEXT:    .cfi_offset 22, -16
+; CHECK-NEXT:    addi.d $fp, $sp, 64
+; CHECK-NEXT:    .cfi_def_cfa 22, 0
+; CHECK-NEXT:    bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT:    xvseqi.b $xr0, $xr0, -1
+; CHECK-NEXT:    xvst $xr0, $sp, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 7
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 8
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 8
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 9
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 9
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 10
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 10
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 11
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 11
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 12
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 12
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 13
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 13
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 14
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 14
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 15
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 16
+; CHECK-NEXT:    slli.d $a1, $a1, 15
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 17
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 16
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 18
+; CHECK-NEXT:    slli.d $a1, $a1, 17
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 19
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 18
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 20
+; CHECK-NEXT:    slli.d $a1, $a1, 19
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 21
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 20
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 22
+; CHECK-NEXT:    slli.d $a1, $a1, 21
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 23
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 22
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 24
+; CHECK-NEXT:    slli.d $a1, $a1, 23
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 25
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 24
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 26
+; CHECK-NEXT:    slli.d $a1, $a1, 25
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 27
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 26
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 28
+; CHECK-NEXT:    slli.d $a1, $a1, 27
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 29
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 28
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 30
+; CHECK-NEXT:    slli.d $a1, $a1, 29
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.b $a1, $sp, 31
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 30
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    slli.d $a1, $a1, 31
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; CHECK-NEXT:    addi.d $sp, $fp, -64
+; CHECK-NEXT:    ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT:    ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT:    addi.d $sp, $sp, 64
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp eq <32 x i8> %a, splat (i8 -1)
+  %2 = bitcast <32 x i1> %1 to i32
+  ret i32 %2
+}
+
+define i32 @xmsk_sgt_allones_i8(<32 x i8 > %a) {
+; CHECK-LABEL: xmsk_sgt_allones_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -64
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT:    st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset 1, -8
+; CHECK-NEXT:    .cfi_offset 22, -16
+; CHECK-NEXT:    addi.d $fp, $sp, 64
+; CHECK-NEXT:    .cfi_def_cfa 22, 0
+; CHECK-NEXT:    bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT:    xvrepli.b $xr1, -1
+; CHECK-NEXT:    xvslt.b $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvst $xr0, $sp, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 7
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 8
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 8
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 9
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 9
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 10
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 10
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 11
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 11
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 12
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 12
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 13
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 13
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 14
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 14
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 15
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 16
+; CHECK-NEXT:    slli.d $a1, $a1, 15
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 17
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 16
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 18
+; CHECK-NEXT:    slli.d $a1, $a1, 17
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 19
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 18
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 20
+; CHECK-NEXT:    slli.d $a1, $a1, 19
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 21
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 20
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 22
+; CHECK-NEXT:    slli.d $a1, $a1, 21
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 23
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 22
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 24
+; CHECK-NEXT:    slli.d $a1, $a1, 23
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 25
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 24
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 26
+; CHECK-NEXT:    slli.d $a1, $a1, 25
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 27
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 26
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 28
+; CHECK-NEXT:    slli.d $a1, $a1, 27
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 29
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 28
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 30
+; CHECK-NEXT:    slli.d $a1, $a1, 29
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.b $a1, $sp, 31
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 30
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    slli.d $a1, $a1, 31
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; CHECK-NEXT:    addi.d $sp, $fp, -64
+; CHECK-NEXT:    ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT:    ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT:    addi.d $sp, $sp, 64
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp sgt <32 x i8> %a, splat (i8 -1)
+  %2 = bitcast <32 x i1> %1 to i32
+  ret i32 %2
+}
+
+define i32 @xmsk_sge_allzeros_i8(<32 x i8 > %a) {
+; CHECK-LABEL: xmsk_sge_allzeros_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -64
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT:    st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset 1, -8
+; CHECK-NEXT:    .cfi_offset 22, -16
+; CHECK-NEXT:    addi.d $fp, $sp, 64
+; CHECK-NEXT:    .cfi_def_cfa 22, 0
+; CHECK-NEXT:    bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT:    xvrepli.b $xr1, 0
+; CHECK-NEXT:    xvsle.b $xr0, $xr1, $xr0
+; CHECK-NEXT:    xvst $xr0, $sp, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 7
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 8
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 8
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 9
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 9
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 10
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 10
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 11
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 11
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 12
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 12
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 13
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 13
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 14
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 14
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 15
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 16
+; CHECK-NEXT:    slli.d $a1, $a1, 15
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 17
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 16
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 18
+; CHECK-NEXT:    slli.d $a1, $a1, 17
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 19
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 18
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 20
+; CHECK-NEXT:    slli.d $a1, $a1, 19
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 21
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 20
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 22
+; CHECK-NEXT:    slli.d $a1, $a1, 21
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 23
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 22
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 24
+; CHECK-NEXT:    slli.d $a1, $a1, 23
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 25
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 24
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 26
+; CHECK-NEXT:    slli.d $a1, $a1, 25
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 27
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 26
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 28
+; CHECK-NEXT:    slli.d $a1, $a1, 27
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 29
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 28
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 30
+; CHECK-NEXT:    slli.d $a1, $a1, 29
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.b $a1, $sp, 31
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 30
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    slli.d $a1, $a1, 31
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; CHECK-NEXT:    addi.d $sp, $fp, -64
+; CHECK-NEXT:    ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT:    ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT:    addi.d $sp, $sp, 64
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp sge <32 x i8> %a, splat (i8 0)
+  %2 = bitcast <32 x i1> %1 to i32
+  ret i32 %2
+}
+
+define i32 @xmsk_slt_allzeros_i8(<32 x i8 > %a) {
+; CHECK-LABEL: xmsk_slt_allzeros_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -64
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT:    st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset 1, -8
+; CHECK-NEXT:    .cfi_offset 22, -16
+; CHECK-NEXT:    addi.d $fp, $sp, 64
+; CHECK-NEXT:    .cfi_def_cfa 22, 0
+; CHECK-NEXT:    bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT:    xvslti.b $xr0, $xr0, 0
+; CHECK-NEXT:    xvst $xr0, $sp, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 7
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 8
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 8
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 9
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 9
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 10
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 10
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 11
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 11
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 12
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 12
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 13
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 13
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 14
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 14
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 15
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 16
+; CHECK-NEXT:    slli.d $a1, $a1, 15
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 17
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 16
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 18
+; CHECK-NEXT:    slli.d $a1, $a1, 17
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 19
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 18
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 20
+; CHECK-NEXT:    slli.d $a1, $a1, 19
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 21
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 20
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 22
+; CHECK-NEXT:    slli.d $a1, $a1, 21
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 23
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 22
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 24
+; CHECK-NEXT:    slli.d $a1, $a1, 23
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 25
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 24
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 26
+; CHECK-NEXT:    slli.d $a1, $a1, 25
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 27
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 26
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 28
+; CHECK-NEXT:    slli.d $a1, $a1, 27
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 29
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 28
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 30
+; CHECK-NEXT:    slli.d $a1, $a1, 29
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.b $a1, $sp, 31
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 30
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    slli.d $a1, $a1, 31
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; CHECK-NEXT:    addi.d $sp, $fp, -64
+; CHECK-NEXT:    ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT:    ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT:    addi.d $sp, $sp, 64
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp slt <32 x i8> %a, splat (i8 0)
+  %2 = bitcast <32 x i1> %1 to i32
+  ret i32 %2
+}
+
+define i16 @xmsk_slt_allzeros_i16(<16 x i16 > %a) {
+; CHECK-LABEL: xmsk_slt_allzeros_i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -64
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT:    st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset 1, -8
+; CHECK-NEXT:    .cfi_offset 22, -16
+; CHECK-NEXT:    addi.d $fp, $sp, 64
+; CHECK-NEXT:    .cfi_def_cfa 22, 0
+; CHECK-NEXT:    bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT:    xvslti.h $xr0, $xr0, 0
+; CHECK-NEXT:    xvst $xr0, $sp, 0
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 7
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.hu $a2, $sp, 16
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.hu $a1, $sp, 18
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 8
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.hu $a2, $sp, 20
+; CHECK-NEXT:    slli.d $a1, $a1, 9
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.hu $a1, $sp, 22
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 10
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.hu $a2, $sp, 24
+; CHECK-NEXT:    slli.d $a1, $a1, 11
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.hu $a1, $sp, 26
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 12
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.hu $a2, $sp, 28
+; CHECK-NEXT:    slli.d $a1, $a1, 13
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.h $a1, $sp, 30
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 14
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    slli.d $a1, $a1, 15
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; CHECK-NEXT:    addi.d $sp, $fp, -64
+; CHECK-NEXT:    ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT:    ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT:    addi.d $sp, $sp, 64
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp slt <16 x i16> %a, splat (i16 0)
+  %2 = bitcast <16 x i1> %1 to i16
+  ret i16 %2
+}
+
+define i8 @xmsk_slt_allzeros_i32(<8 x i32 > %a) {
+; CHECK-LABEL: xmsk_slt_allzeros_i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    xvslti.w $xr0, $xr0, 0
+; CHECK-NEXT:    xvpickve2gr.w $a0, $xr0, 0
+; CHECK-NEXT:    xvpickve2gr.w $a1, $xr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    xvpickve2gr.w $a1, $xr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    xvpickve2gr.w $a1, $xr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    xvpickve2gr.w $a1, $xr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    xvpickve2gr.w $a1, $xr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    xvpickve2gr.w $a1, $xr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    xvpickve2gr.w $a1, $xr0, 7
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp slt <8 x i32> %a, splat (i32 0)
+  %2 = bitcast <8 x i1> %1 to i8
+  ret i8 %2
+}
+
+define i4 @xmsk_slt_allzeros_i64(<4 x i64 > %a) {
+; CHECK-LABEL: xmsk_slt_allzeros_i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    xvslti.d $xr0, $xr0, 0
+; CHECK-NEXT:    xvpickve2gr.d $a0, $xr0, 0
+; CHECK-NEXT:    xvpickve2gr.d $a1, $xr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 1
+; CHECK-NEXT:    sub.d $a0, $a1, $a0
+; CHECK-NEXT:    xvpickve2gr.d $a1, $xr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    xvpickve2gr.d $a1, $xr0, 3
+; CHECK-NEXT:    slli.d $a1, $a1, 3
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    andi $a0, $a0, 15
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp slt <4 x i64> %a, splat (i64 0)
+  %2 = bitcast <4 x i1> %1 to i4
+  ret i4 %2
+}
+
+define i32 @xmsk_sle_allones_i8(<32 x i8 > %a) {
+; CHECK-LABEL: xmsk_sle_allones_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -64
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT:    st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset 1, -8
+; CHECK-NEXT:    .cfi_offset 22, -16
+; CHECK-NEXT:    addi.d $fp, $sp, 64
+; CHECK-NEXT:    .cfi_def_cfa 22, 0
+; CHECK-NEXT:    bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT:    xvslei.b $xr0, $xr0, -1
+; CHECK-NEXT:    xvst $xr0, $sp, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 7
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 8
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 8
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 9
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 9
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 10
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 10
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 11
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 11
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 12
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 12
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 13
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 13
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 14
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 14
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 15
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 16
+; CHECK-NEXT:    slli.d $a1, $a1, 15
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 17
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 16
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 18
+; CHECK-NEXT:    slli.d $a1, $a1, 17
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 19
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 18
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 20
+; CHECK-NEXT:    slli.d $a1, $a1, 19
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 21
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 20
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 22
+; CHECK-NEXT:    slli.d $a1, $a1, 21
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 23
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 22
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 24
+; CHECK-NEXT:    slli.d $a1, $a1, 23
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 25
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 24
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 26
+; CHECK-NEXT:    slli.d $a1, $a1, 25
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 27
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 26
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 28
+; CHECK-NEXT:    slli.d $a1, $a1, 27
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 29
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 28
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 30
+; CHECK-NEXT:    slli.d $a1, $a1, 29
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.b $a1, $sp, 31
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 30
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    slli.d $a1, $a1, 31
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; CHECK-NEXT:    addi.d $sp, $fp, -64
+; CHECK-NEXT:    ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT:    ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT:    addi.d $sp, $sp, 64
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp sle <32 x i8> %a, splat (i8 -1)
+  %2 = bitcast <32 x i1> %1 to i32
+  ret i32 %2
+}
+
+define i16 @xmsk_sle_allones_i32(<16 x i16 > %a) {
+; CHECK-LABEL: xmsk_sle_allones_i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -64
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT:    st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset 1, -8
+; CHECK-NEXT:    .cfi_offset 22, -16
+; CHECK-NEXT:    addi.d $fp, $sp, 64
+; CHECK-NEXT:    .cfi_def_cfa 22, 0
+; CHECK-NEXT:    bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT:    xvslei.h $xr0, $xr0, -1
+; CHECK-NEXT:    xvst $xr0, $sp, 0
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 7
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.hu $a2, $sp, 16
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.hu $a1, $sp, 18
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 8
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.hu $a2, $sp, 20
+; CHECK-NEXT:    slli.d $a1, $a1, 9
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.hu $a1, $sp, 22
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 10
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.hu $a2, $sp, 24
+; CHECK-NEXT:    slli.d $a1, $a1, 11
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.hu $a1, $sp, 26
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 12
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.hu $a2, $sp, 28
+; CHECK-NEXT:    slli.d $a1, $a1, 13
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.h $a1, $sp, 30
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 14
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    slli.d $a1, $a1, 15
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; CHECK-NEXT:    addi.d $sp, $fp, -64
+; CHECK-NEXT:    ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT:    ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT:    addi.d $sp, $sp, 64
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp sle <16 x i16> %a, splat (i16 -1)
+  %2 = bitcast <16 x i1> %1 to i16
+  ret i16 %2
+}
+
+define i8 @xmsk_sle_allones_i16(<8 x i32 > %a) {
+; CHECK-LABEL: xmsk_sle_allones_i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    xvslei.w $xr0, $xr0, -1
+; CHECK-NEXT:    xvpickve2gr.w $a0, $xr0, 0
+; CHECK-NEXT:    xvpickve2gr.w $a1, $xr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    xvpickve2gr.w $a1, $xr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    xvpickve2gr.w $a1, $xr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    xvpickve2gr.w $a1, $xr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    xvpickve2gr.w $a1, $xr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    xvpickve2gr.w $a1, $xr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    xvpickve2gr.w $a1, $xr0, 7
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp sle <8 x i32> %a, splat (i32 -1)
+  %2 = bitcast <8 x i1> %1 to i8
+  ret i8 %2
+}
+
+define i4 @xmsk_sle_allones_i64(<4 x i64 > %a) {
+; CHECK-LABEL: xmsk_sle_allones_i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    xvslei.d $xr0, $xr0, -1
+; CHECK-NEXT:    xvpickve2gr.d $a0, $xr0, 0
+; CHECK-NEXT:    xvpickve2gr.d $a1, $xr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 1
+; CHECK-NEXT:    sub.d $a0, $a1, $a0
+; CHECK-NEXT:    xvpickve2gr.d $a1, $xr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    xvpickve2gr.d $a1, $xr0, 3
+; CHECK-NEXT:    slli.d $a1, $a1, 3
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    andi $a0, $a0, 15
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp sle <4 x i64> %a, splat (i64 -1)
+  %2 = bitcast <4 x i1> %1 to i4
+  ret i4 %2
+}
+
+define i32 @xmsk_ne_allzeros_i8(<32 x i8 > %a) {
+; CHECK-LABEL: xmsk_ne_allzeros_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -64
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT:    st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset 1, -8
+; CHECK-NEXT:    .cfi_offset 22, -16
+; CHECK-NEXT:    addi.d $fp, $sp, 64
+; CHECK-NEXT:    .cfi_def_cfa 22, 0
+; CHECK-NEXT:    bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT:    xvseqi.b $xr0, $xr0, 0
+; CHECK-NEXT:    xvxori.b $xr0, $xr0, 255
+; CHECK-NEXT:    xvst $xr0, $sp, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 7
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 8
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 8
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 9
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 9
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 10
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 10
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 11
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 11
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 12
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 12
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 13
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 13
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 14
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 14
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 15
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 16
+; CHECK-NEXT:    slli.d $a1, $a1, 15
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 17
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 16
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 18
+; CHECK-NEXT:    slli.d $a1, $a1, 17
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 19
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 18
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 20
+; CHECK-NEXT:    slli.d $a1, $a1, 19
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 21
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 20
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 22
+; CHECK-NEXT:    slli.d $a1, $a1, 21
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 23
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 22
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 24
+; CHECK-NEXT:    slli.d $a1, $a1, 23
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 25
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 24
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 26
+; CHECK-NEXT:    slli.d $a1, $a1, 25
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 27
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 26
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 28
+; CHECK-NEXT:    slli.d $a1, $a1, 27
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 29
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 28
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 30
+; CHECK-NEXT:    slli.d $a1, $a1, 29
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.b $a1, $sp, 31
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 30
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    slli.d $a1, $a1, 31
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; CHECK-NEXT:    addi.d $sp, $fp, -64
+; CHECK-NEXT:    ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT:    ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT:    addi.d $sp, $sp, 64
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp ne <32 x i8> %a, splat (i8 0)
+  %2 = bitcast <32 x i1> %1 to i32
+  ret i32 %2
+}
+
+define i32 @xmsk_ne_allones_i8(<32 x i8 > %a) {
+; CHECK-LABEL: xmsk_ne_allones_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -64
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    st.d $ra, $sp, 56 # 8-byte Folded Spill
+; CHECK-NEXT:    st.d $fp, $sp, 48 # 8-byte Folded Spill
+; CHECK-NEXT:    .cfi_offset 1, -8
+; CHECK-NEXT:    .cfi_offset 22, -16
+; CHECK-NEXT:    addi.d $fp, $sp, 64
+; CHECK-NEXT:    .cfi_def_cfa 22, 0
+; CHECK-NEXT:    bstrins.d $sp, $zero, 4, 0
+; CHECK-NEXT:    xvseqi.b $xr0, $xr0, -1
+; CHECK-NEXT:    xvxori.b $xr0, $xr0, 255
+; CHECK-NEXT:    xvst $xr0, $sp, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 7
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 8
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 8
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 9
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 9
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 10
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 10
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 11
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 11
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 12
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 12
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 13
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 13
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 14
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 14
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 15
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 16
+; CHECK-NEXT:    slli.d $a1, $a1, 15
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 17
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 16
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 18
+; CHECK-NEXT:    slli.d $a1, $a1, 17
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 19
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 18
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 20
+; CHECK-NEXT:    slli.d $a1, $a1, 19
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 21
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 20
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 22
+; CHECK-NEXT:    slli.d $a1, $a1, 21
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 23
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 22
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 24
+; CHECK-NEXT:    slli.d $a1, $a1, 23
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 25
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 24
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 26
+; CHECK-NEXT:    slli.d $a1, $a1, 25
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 27
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 26
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 28
+; CHECK-NEXT:    slli.d $a1, $a1, 27
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.bu $a1, $sp, 29
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 28
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    ld.bu $a2, $sp, 30
+; CHECK-NEXT:    slli.d $a1, $a1, 29
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    ld.b $a1, $sp, 31
+; CHECK-NEXT:    andi $a2, $a2, 1
+; CHECK-NEXT:    slli.d $a2, $a2, 30
+; CHECK-NEXT:    or $a0, $a0, $a2
+; CHECK-NEXT:    slli.d $a1, $a1, 31
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    bstrpick.d $a0, $a0, 31, 0
+; CHECK-NEXT:    addi.d $sp, $fp, -64
+; CHECK-NEXT:    ld.d $fp, $sp, 48 # 8-byte Folded Reload
+; CHECK-NEXT:    ld.d $ra, $sp, 56 # 8-byte Folded Reload
+; CHECK-NEXT:    addi.d $sp, $sp, 64
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp ne <32 x i8> %a, splat (i8 -1)
+  %2 = bitcast <32 x i1> %1 to i32
+  ret i32 %2
+}
diff --git a/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll
new file mode 100644
index 0000000000000..bff1d3198d37b
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lsx/vmskcond.ll
@@ -0,0 +1,688 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch64 --mattr=+lsx < %s | FileCheck %s
+
+define i16 @vmsk_eq_allzeros_i8(<16 x i8 > %a) {
+; CHECK-LABEL: vmsk_eq_allzeros_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vseqi.b $vr0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 7
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 8
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 8
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 9
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 9
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 10
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 10
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 11
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 11
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 12
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 12
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 13
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 13
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 14
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 14
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 15
+; CHECK-NEXT:    slli.d $a1, $a1, 15
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp eq <16 x i8> %a, splat (i8 0)
+  %2 = bitcast <16 x i1> %1 to i16
+  ret i16 %2
+}
+
+define i16 @vmsk_eq_allones_i8(<16 x i8 > %a) {
+; CHECK-LABEL: vmsk_eq_allones_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vseqi.b $vr0, $vr0, -1
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 7
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 8
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 8
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 9
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 9
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 10
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 10
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 11
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 11
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 12
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 12
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 13
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 13
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 14
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 14
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 15
+; CHECK-NEXT:    slli.d $a1, $a1, 15
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp eq <16 x i8> %a, splat (i8 -1)
+  %2 = bitcast <16 x i1> %1 to i16
+  ret i16 %2
+}
+
+define i16 @vmsk_sgt_allones_i8(<16 x i8 > %a) {
+; CHECK-LABEL: vmsk_sgt_allones_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vrepli.b $vr1, -1
+; CHECK-NEXT:    vslt.b $vr0, $vr1, $vr0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 7
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 8
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 8
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 9
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 9
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 10
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 10
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 11
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 11
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 12
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 12
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 13
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 13
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 14
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 14
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 15
+; CHECK-NEXT:    slli.d $a1, $a1, 15
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp sgt <16 x i8> %a, splat (i8 -1)
+  %2 = bitcast <16 x i1> %1 to i16
+  ret i16 %2
+}
+
+define i16 @vmsk_sge_allzeros_i8(<16 x i8 > %a) {
+; CHECK-LABEL: vmsk_sge_allzeros_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vrepli.b $vr1, 0
+; CHECK-NEXT:    vsle.b $vr0, $vr1, $vr0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 7
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 8
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 8
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 9
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 9
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 10
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 10
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 11
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 11
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 12
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 12
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 13
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 13
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 14
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 14
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 15
+; CHECK-NEXT:    slli.d $a1, $a1, 15
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp sge <16 x i8> %a, splat (i8 0)
+  %2 = bitcast <16 x i1> %1 to i16
+  ret i16 %2
+}
+
+define i16 @vmsk_slt_allzeros_i8(<16 x i8 > %a) {
+; CHECK-LABEL: vmsk_slt_allzeros_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vslti.b $vr0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 7
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 8
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 8
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 9
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 9
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 10
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 10
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 11
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 11
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 12
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 12
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 13
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 13
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 14
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 14
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 15
+; CHECK-NEXT:    slli.d $a1, $a1, 15
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp slt <16 x i8> %a, splat (i8 0)
+  %2 = bitcast <16 x i1> %1 to i16
+  ret i16 %2
+}
+
+define i8 @vmsk_slt_allzeros_i16(<8 x i16 > %a) {
+; CHECK-LABEL: vmsk_slt_allzeros_i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vslti.h $vr0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 7
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp slt <8 x i16> %a, splat (i16 0)
+  %2 = bitcast <8 x i1> %1 to i8
+  ret i8 %2
+}
+
+define i4 @vmsk_slt_allzeros_i32(<4 x i32 > %a) {
+; CHECK-LABEL: vmsk_slt_allzeros_i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vslti.w $vr0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.w $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.w $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.w $a1, $vr0, 3
+; CHECK-NEXT:    slli.d $a1, $a1, 3
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    andi $a0, $a0, 15
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp slt <4 x i32> %a, splat (i32 0)
+  %2 = bitcast <4 x i1> %1 to i4
+  ret i4 %2
+}
+
+define i2 @vmsk_slt_allzeros_i64(<2 x i64 > %a) {
+; CHECK-LABEL: vmsk_slt_allzeros_i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vslti.d $vr0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.d $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.d $a1, $vr0, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 1
+; CHECK-NEXT:    sub.d $a0, $a1, $a0
+; CHECK-NEXT:    andi $a0, $a0, 3
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp slt <2 x i64> %a, splat (i64 0)
+  %2 = bitcast <2 x i1> %1 to i2
+  ret i2 %2
+}
+
+define i16 @vmsk_sle_allones_i8(<16 x i8 > %a) {
+; CHECK-LABEL: vmsk_sle_allones_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vslei.b $vr0, $vr0, -1
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 7
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 8
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 8
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 9
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 9
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 10
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 10
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 11
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 11
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 12
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 12
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 13
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 13
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 14
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 14
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 15
+; CHECK-NEXT:    slli.d $a1, $a1, 15
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp sle <16 x i8> %a, splat (i8 -1)
+  %2 = bitcast <16 x i1> %1 to i16
+  ret i16 %2
+}
+
+define i8 @vmsk_sle_allones_i16(<8 x i16 > %a) {
+; CHECK-LABEL: vmsk_sle_allones_i16:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vslei.h $vr0, $vr0, -1
+; CHECK-NEXT:    vpickve2gr.h $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.h $a1, $vr0, 7
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    andi $a0, $a0, 255
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp sle <8 x i16> %a, splat (i16 -1)
+  %2 = bitcast <8 x i1> %1 to i8
+  ret i8 %2
+}
+
+define i4 @vmsk_sle_allones_i32(<4 x i32 > %a) {
+; CHECK-LABEL: vmsk_sle_allones_i32:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vslei.w $vr0, $vr0, -1
+; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.w $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.w $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.w $a1, $vr0, 3
+; CHECK-NEXT:    slli.d $a1, $a1, 3
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    andi $a0, $a0, 15
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp sle <4 x i32> %a, splat (i32 -1)
+  %2 = bitcast <4 x i1> %1 to i4
+  ret i4 %2
+}
+
+define i2 @vmsk_sle_allones_i64(<2 x i64 > %a) {
+; CHECK-LABEL: vmsk_sle_allones_i64:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vslei.d $vr0, $vr0, -1
+; CHECK-NEXT:    vpickve2gr.d $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.d $a1, $vr0, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 1
+; CHECK-NEXT:    sub.d $a0, $a1, $a0
+; CHECK-NEXT:    andi $a0, $a0, 3
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp sle <2 x i64> %a, splat (i64 -1)
+  %2 = bitcast <2 x i1> %1 to i2
+  ret i2 %2
+}
+
+define i16 @vmsk_ne_allzeros_i8(<16 x i8 > %a) {
+; CHECK-LABEL: vmsk_ne_allzeros_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vseqi.b $vr0, $vr0, 0
+; CHECK-NEXT:    vxori.b $vr0, $vr0, 255
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 7
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 8
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 8
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 9
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 9
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 10
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 10
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 11
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 11
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 12
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 12
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 13
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 13
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 14
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 14
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 15
+; CHECK-NEXT:    slli.d $a1, $a1, 15
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp ne <16 x i8> %a, splat (i8 0)
+  %2 = bitcast <16 x i1> %1 to i16
+  ret i16 %2
+}
+
+define i16 @vmsk_ne_allones_i8(<16 x i8 > %a) {
+; CHECK-LABEL: vmsk_ne_allones_i8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    addi.d $sp, $sp, -16
+; CHECK-NEXT:    .cfi_def_cfa_offset 16
+; CHECK-NEXT:    vseqi.b $vr0, $vr0, -1
+; CHECK-NEXT:    vxori.b $vr0, $vr0, 255
+; CHECK-NEXT:    vpickve2gr.b $a0, $vr0, 0
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 1
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    bstrins.d $a0, $a1, 63, 1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 2
+; CHECK-NEXT:    bstrins.d $a0, $a1, 2, 2
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 3
+; CHECK-NEXT:    bstrins.d $a0, $a1, 3, 3
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 4
+; CHECK-NEXT:    bstrins.d $a0, $a1, 4, 4
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 5
+; CHECK-NEXT:    bstrins.d $a0, $a1, 5, 5
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 6
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 6
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 7
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 7
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 8
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 8
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 9
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 9
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 10
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 10
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 11
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 11
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 12
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 12
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 13
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 13
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 14
+; CHECK-NEXT:    andi $a1, $a1, 1
+; CHECK-NEXT:    slli.d $a1, $a1, 14
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    vpickve2gr.b $a1, $vr0, 15
+; CHECK-NEXT:    slli.d $a1, $a1, 15
+; CHECK-NEXT:    or $a0, $a0, $a1
+; CHECK-NEXT:    bstrpick.d $a0, $a0, 15, 0
+; CHECK-NEXT:    addi.d $sp, $sp, 16
+; CHECK-NEXT:    ret
+entry:
+  %1 = icmp ne <16 x i8> %a, splat (i8 -1)
+  %2 = bitcast <16 x i1> %1 to i16
+  ret i16 %2
+}



More information about the llvm-commits mailing list