[llvm] [LoongArch][NFC] Pre-commit tests for vxi1-masks in lasx (PR #163490)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Oct 14 20:55:07 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-loongarch
Author: Zhaoxin Yang (ylzsx)
<details>
<summary>Changes</summary>
---
Patch is 39.44 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/163490.diff
1 Files Affected:
- (added) llvm/test/CodeGen/LoongArch/lasx/vxi1-masks.ll (+1026)
``````````diff
diff --git a/llvm/test/CodeGen/LoongArch/lasx/vxi1-masks.ll b/llvm/test/CodeGen/LoongArch/lasx/vxi1-masks.ll
new file mode 100644
index 0000000000000..cd98ba7e4083c
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/lasx/vxi1-masks.ll
@@ -0,0 +1,1026 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
+; RUN: llc --mtriple=loongarch32 --mattr=+32s,+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA32
+; RUN: llc --mtriple=loongarch64 --mattr=+lasx < %s | FileCheck %s --check-prefixes=CHECK,LA64
+
+define void @xor_zext_masks_v4i64(ptr %res, ptr %a, ptr %b) nounwind {
+; LA32-LABEL: xor_zext_masks_v4i64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvfcmp.clt.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0
+; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
+; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
+; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 4
+; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
+; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6
+; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
+; LA32-NEXT: vldi $vr0, -1777
+; LA32-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vextrins.w $vr1, $vr0, 2
+; LA32-NEXT: vextrins.w $vr1, $vr0, 35
+; LA32-NEXT: vextrins.w $vr0, $vr0, 33
+; LA32-NEXT: xvpermi.q $xr0, $xr1, 2
+; LA32-NEXT: xvrepli.d $xr1, 1
+; LA32-NEXT: xvand.v $xr0, $xr0, $xr1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xor_zext_masks_v4i64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvfcmp.clt.d $xr0, $xr0, $xr1
+; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 0
+; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
+; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 1
+; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1
+; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 2
+; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
+; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 3
+; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
+; LA64-NEXT: vldi $vr0, -1777
+; LA64-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vpickve2gr.w $a1, $vr0, 2
+; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 0
+; LA64-NEXT: vpickve2gr.w $a1, $vr0, 3
+; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
+; LA64-NEXT: vpickve2gr.w $a1, $vr0, 0
+; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 0
+; LA64-NEXT: vpickve2gr.w $a1, $vr0, 1
+; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 1
+; LA64-NEXT: xvpermi.q $xr2, $xr1, 2
+; LA64-NEXT: xvrepli.d $xr0, 1
+; LA64-NEXT: xvand.v $xr0, $xr2, $xr0
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
+ %v0 = load <4 x double>, ptr %a
+ %v1 = load <4 x double>, ptr %b
+ %m0 = fcmp olt <4 x double> %v0, %v1
+ %mxor = xor <4 x i1> %m0, <i1 1, i1 0, i1 1, i1 0>
+ %r = zext <4 x i1> %mxor to <4 x i64>
+ store <4 x i64> %r, ptr %res
+ ret void
+}
+
+define void @xor_zext_masks_v8i32(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xor_zext_masks_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvfcmp.clt.s $xr0, $xr0, $xr1
+; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 0
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0
+; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 1
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
+; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 2
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2
+; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 3
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3
+; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 4
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4
+; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 5
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5
+; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 6
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6
+; CHECK-NEXT: xvpickve2gr.w $a1, $xr0, 7
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7
+; CHECK-NEXT: vldi $vr0, -2305
+; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 4
+; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 0
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 5
+; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 1
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 6
+; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 2
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 7
+; CHECK-NEXT: vinsgr2vr.w $vr1, $a1, 3
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 0
+; CHECK-NEXT: vinsgr2vr.w $vr2, $a1, 0
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 1
+; CHECK-NEXT: vinsgr2vr.w $vr2, $a1, 1
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 2
+; CHECK-NEXT: vinsgr2vr.w $vr2, $a1, 2
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 3
+; CHECK-NEXT: vinsgr2vr.w $vr2, $a1, 3
+; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2
+; CHECK-NEXT: xvrepli.w $xr0, 1
+; CHECK-NEXT: xvand.v $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <8 x float>, ptr %a
+ %v1 = load <8 x float>, ptr %b
+ %m0 = fcmp olt <8 x float> %v0, %v1
+ %mxor = xor <8 x i1> %m0, <i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>
+ %r = zext <8 x i1> %mxor to <8 x i32>
+ store <8 x i32> %r, ptr %res
+ ret void
+}
+
+define void @xor_zext_masks_v16i16(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xor_zext_masks_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvseq.h $xr0, $xr0, $xr1
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 0
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 0
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 1
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 1
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 2
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 2
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 3
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 3
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 4
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 4
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 5
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 5
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 6
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 6
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 7
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 7
+; CHECK-NEXT: xvpermi.d $xr0, $xr0, 14
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 0
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 8
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 1
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 9
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 2
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 10
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 3
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 11
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 4
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 12
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 5
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 13
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 6
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 14
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 7
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 15
+; CHECK-NEXT: vrepli.h $vr0, 255
+; CHECK-NEXT: vxor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 8
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 0
+; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 9
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 1
+; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 10
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 2
+; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 11
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 3
+; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 12
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 4
+; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 13
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 5
+; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 14
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 6
+; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 15
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a1, 7
+; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 0
+; CHECK-NEXT: vinsgr2vr.h $vr2, $a1, 0
+; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 1
+; CHECK-NEXT: vinsgr2vr.h $vr2, $a1, 1
+; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 2
+; CHECK-NEXT: vinsgr2vr.h $vr2, $a1, 2
+; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 3
+; CHECK-NEXT: vinsgr2vr.h $vr2, $a1, 3
+; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 4
+; CHECK-NEXT: vinsgr2vr.h $vr2, $a1, 4
+; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 5
+; CHECK-NEXT: vinsgr2vr.h $vr2, $a1, 5
+; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 6
+; CHECK-NEXT: vinsgr2vr.h $vr2, $a1, 6
+; CHECK-NEXT: vpickve2gr.b $a1, $vr0, 7
+; CHECK-NEXT: vinsgr2vr.h $vr2, $a1, 7
+; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2
+; CHECK-NEXT: xvrepli.h $xr0, 1
+; CHECK-NEXT: xvand.v $xr0, $xr2, $xr0
+; CHECK-NEXT: xvst $xr0, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i16>, ptr %a
+ %v1 = load <16 x i16>, ptr %b
+ %m0 = icmp eq <16 x i16> %v0, %v1
+ %mxor = xor <16 x i1> %m0, <i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0,
+ i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>
+ %r = zext <16 x i1> %mxor to <16 x i16>
+ store <16 x i16> %r, ptr %res
+ ret void
+}
+
+define void @xor_sext_masks_v4i64(ptr %res, ptr %a, ptr %b) nounwind {
+; LA32-LABEL: xor_sext_masks_v4i64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvfcmp.clt.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0
+; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
+; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
+; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 4
+; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
+; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6
+; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
+; LA32-NEXT: vldi $vr0, -1777
+; LA32-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vori.b $vr1, $vr0, 0
+; LA32-NEXT: vextrins.w $vr1, $vr1, 16
+; LA32-NEXT: vextrins.w $vr1, $vr0, 33
+; LA32-NEXT: vextrins.w $vr1, $vr0, 49
+; LA32-NEXT: vextrins.w $vr2, $vr0, 2
+; LA32-NEXT: vextrins.w $vr2, $vr0, 18
+; LA32-NEXT: vextrins.w $vr2, $vr0, 35
+; LA32-NEXT: vextrins.w $vr2, $vr0, 51
+; LA32-NEXT: xvpermi.q $xr1, $xr2, 2
+; LA32-NEXT: xvst $xr1, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xor_sext_masks_v4i64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvfcmp.clt.d $xr0, $xr0, $xr1
+; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 0
+; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
+; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 2
+; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
+; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 1
+; LA64-NEXT: xvpickve2gr.d $a2, $xr0, 3
+; LA64-NEXT: vrepli.b $vr0, -1
+; LA64-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vpickve2gr.w $a3, $vr0, 2
+; LA64-NEXT: vinsgr2vr.d $vr1, $a3, 0
+; LA64-NEXT: vinsgr2vr.d $vr1, $a2, 1
+; LA64-NEXT: vpickve2gr.w $a2, $vr0, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a2, 0
+; LA64-NEXT: vinsgr2vr.d $vr0, $a1, 1
+; LA64-NEXT: xvpermi.q $xr0, $xr1, 2
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
+ %v0 = load <4 x double>, ptr %a
+ %v1 = load <4 x double>, ptr %b
+ %m0 = fcmp olt <4 x double> %v0, %v1
+ %mxor = xor <4 x i1> %m0, <i1 1, i1 0, i1 1, i1 0>
+ %r = sext <4 x i1> %mxor to <4 x i64>
+ store <4 x i64> %r, ptr %res
+ ret void
+}
+
+define void @xor_sext_masks_v8i32(ptr %res, ptr %a, ptr %b) nounwind {
+; LA32-LABEL: xor_sext_masks_v8i32:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvfcmp.clt.s $xr0, $xr0, $xr1
+; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0
+; LA32-NEXT: vinsgr2vr.h $vr1, $a1, 0
+; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
+; LA32-NEXT: vinsgr2vr.h $vr1, $a1, 2
+; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 4
+; LA32-NEXT: vinsgr2vr.h $vr1, $a1, 4
+; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6
+; LA32-NEXT: vinsgr2vr.h $vr1, $a1, 6
+; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 3
+; LA32-NEXT: xvpickve2gr.w $a2, $xr0, 1
+; LA32-NEXT: xvpickve2gr.w $a3, $xr0, 7
+; LA32-NEXT: xvpickve2gr.w $a4, $xr0, 5
+; LA32-NEXT: vrepli.b $vr0, -1
+; LA32-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vpickve2gr.h $a5, $vr0, 4
+; LA32-NEXT: ext.w.h $a5, $a5
+; LA32-NEXT: vinsgr2vr.w $vr1, $a5, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a4, 1
+; LA32-NEXT: vpickve2gr.h $a4, $vr0, 6
+; LA32-NEXT: ext.w.h $a4, $a4
+; LA32-NEXT: vinsgr2vr.w $vr1, $a4, 2
+; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 3
+; LA32-NEXT: vpickve2gr.h $a3, $vr0, 0
+; LA32-NEXT: ext.w.h $a3, $a3
+; LA32-NEXT: vinsgr2vr.w $vr2, $a3, 0
+; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 1
+; LA32-NEXT: vpickve2gr.h $a2, $vr0, 2
+; LA32-NEXT: ext.w.h $a2, $a2
+; LA32-NEXT: vinsgr2vr.w $vr2, $a2, 2
+; LA32-NEXT: vinsgr2vr.w $vr2, $a1, 3
+; LA32-NEXT: xvpermi.q $xr2, $xr1, 2
+; LA32-NEXT: xvst $xr2, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: xor_sext_masks_v8i32:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvfcmp.clt.s $xr0, $xr0, $xr1
+; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 0
+; LA64-NEXT: vinsgr2vr.h $vr1, $a1, 0
+; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 2
+; LA64-NEXT: vinsgr2vr.h $vr1, $a1, 2
+; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 4
+; LA64-NEXT: vinsgr2vr.h $vr1, $a1, 4
+; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 6
+; LA64-NEXT: vinsgr2vr.h $vr1, $a1, 6
+; LA64-NEXT: xvpickve2gr.w $a1, $xr0, 3
+; LA64-NEXT: xvpickve2gr.w $a2, $xr0, 1
+; LA64-NEXT: xvpickve2gr.w $a3, $xr0, 7
+; LA64-NEXT: xvpickve2gr.w $a4, $xr0, 5
+; LA64-NEXT: vrepli.b $vr0, -1
+; LA64-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vpickve2gr.h $a5, $vr0, 4
+; LA64-NEXT: ext.w.h $a5, $a5
+; LA64-NEXT: vinsgr2vr.w $vr1, $a5, 0
+; LA64-NEXT: ext.w.h $a4, $a4
+; LA64-NEXT: vinsgr2vr.w $vr1, $a4, 1
+; LA64-NEXT: vpickve2gr.h $a4, $vr0, 6
+; LA64-NEXT: ext.w.h $a4, $a4
+; LA64-NEXT: vinsgr2vr.w $vr1, $a4, 2
+; LA64-NEXT: ext.w.h $a3, $a3
+; LA64-NEXT: vinsgr2vr.w $vr1, $a3, 3
+; LA64-NEXT: vpickve2gr.h $a3, $vr0, 0
+; LA64-NEXT: ext.w.h $a3, $a3
+; LA64-NEXT: vinsgr2vr.w $vr2, $a3, 0
+; LA64-NEXT: ext.w.h $a2, $a2
+; LA64-NEXT: vinsgr2vr.w $vr2, $a2, 1
+; LA64-NEXT: vpickve2gr.h $a2, $vr0, 2
+; LA64-NEXT: ext.w.h $a2, $a2
+; LA64-NEXT: vinsgr2vr.w $vr2, $a2, 2
+; LA64-NEXT: ext.w.h $a1, $a1
+; LA64-NEXT: vinsgr2vr.w $vr2, $a1, 3
+; LA64-NEXT: xvpermi.q $xr2, $xr1, 2
+; LA64-NEXT: xvst $xr2, $a0, 0
+; LA64-NEXT: ret
+ %v0 = load <8 x float>, ptr %a
+ %v1 = load <8 x float>, ptr %b
+ %m0 = fcmp olt <8 x float> %v0, %v1
+ %mxor = xor <8 x i1> %m0, <i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>
+ %r = sext <8 x i1> %mxor to <8 x i32>
+ store <8 x i32> %r, ptr %res
+ ret void
+}
+
+define void @xor_sext_masks_v16i16(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: xor_sext_masks_v16i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT: xvld $xr1, $a2, 0
+; CHECK-NEXT: xvseq.h $xr0, $xr0, $xr1
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 0
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 0
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 2
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 2
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 4
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 4
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 6
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 6
+; CHECK-NEXT: xvpermi.d $xr2, $xr0, 14
+; CHECK-NEXT: vpickve2gr.h $a1, $vr2, 0
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 8
+; CHECK-NEXT: vpickve2gr.h $a1, $vr2, 2
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 10
+; CHECK-NEXT: vpickve2gr.h $a1, $vr2, 4
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 12
+; CHECK-NEXT: vpickve2gr.h $a1, $vr2, 6
+; CHECK-NEXT: vinsgr2vr.b $vr1, $a1, 14
+; CHECK-NEXT: vpickve2gr.h $a1, $vr0, 7
+; CHECK-NEXT: vpickve2gr.h $a2, $vr0, 5
+; CHECK-NEXT: vpickve2gr.h $a3, $vr0, 3
+; CHECK-NEXT: vpickve2gr.h $a4, $vr0, 1
+; CHECK-NEXT: vpickve2gr.h $a5, $vr2, 7
+; CHECK-NEXT: vpickve2gr.h $a6, $vr2, 5
+; CHECK-NEXT: vpickve2gr.h $a7, $vr2, 3
+; CHECK-NEXT: vpickve2gr.h $t0, $vr2, 1
+; CHECK-NEXT: vxori.b $vr0, $vr1, 255
+; CHECK-NEXT: vpickve2gr.b $t1, $vr0, 8
+; CHECK-NEXT: ext.w.b $t1, $t1
+; CHECK-NEXT: vinsgr2vr.h $vr1, $t1, 0
+; CHECK-NEXT: ext.w.b $t0, $t0
+; CHECK-NEXT: vinsgr2vr.h $vr1, $t0, 1
+; CHECK-NEXT: vpickve2gr.b $t0, $vr0, 10
+; CHECK-NEXT: ext.w.b $t0, $t0
+; CHECK-NEXT: vinsgr2vr.h $vr1, $t0, 2
+; CHECK-NEXT: ext.w.b $a7, $a7
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a7, 3
+; CHECK-NEXT: vpickve2gr.b $a7, $vr0, 12
+; CHECK-NEXT: ext.w.b $a7, $a7
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a7, 4
+; CHECK-NEXT: ext.w.b $a6, $a6
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a6, 5
+; CHECK-NEXT: vpickve2gr.b $a6, $vr0, 14
+; CHECK-NEXT: ext.w.b $a6, $a6
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a6, 6
+; CHECK-NEXT: ext.w.b $a5, $a5
+; CHECK-NEXT: vinsgr2vr.h $vr1, $a5, 7
+; CHECK-NEXT: vpickve2gr.b $a5, $vr0, 0
+; CHECK-NEXT: ext.w.b $a5, $a5
+; CHECK-NEXT: vinsgr2vr.h $vr2, $a5, 0
+; CHECK-NEXT: ext.w.b $a4, $a4
+; CHECK-NEXT: vinsgr2vr.h $vr2, $a4, 1
+; CHECK-NEXT: vpickve2gr.b $a4, $vr0, 2
+; CHECK-NEXT: ext.w.b $a4, $a4
+; CHECK-NEXT: vinsgr2vr.h $vr2, $a4, 2
+; CHECK-NEXT: ext.w.b $a3, $a3
+; CHECK-NEXT: vinsgr2vr.h $vr2, $a3, 3
+; CHECK-NEXT: vpickve2gr.b $a3, $vr0, 4
+; CHECK-NEXT: ext.w.b $a3, $a3
+; CHECK-NEXT: vinsgr2vr.h $vr2, $a3, 4
+; CHECK-NEXT: ext.w.b $a2, $a2
+; CHECK-NEXT: vinsgr2vr.h $vr2, $a2, 5
+; CHECK-NEXT: vpickve2gr.b $a2, $vr0, 6
+; CHECK-NEXT: ext.w.b $a2, $a2
+; CHECK-NEXT: vinsgr2vr.h $vr2, $a2, 6
+; CHECK-NEXT: ext.w.b $a1, $a1
+; CHECK-NEXT: vinsgr2vr.h $vr2, $a1, 7
+; CHECK-NEXT: xvpermi.q $xr2, $xr1, 2
+; CHECK-NEXT: xvst $xr2, $a0, 0
+; CHECK-NEXT: ret
+ %v0 = load <16 x i16>, ptr %a
+ %v1 = load <16 x i16>, ptr %b
+ %m0 = icmp eq <16 x i16> %v0, %v1
+ %mxor = xor <16 x i1> %m0, <i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0,
+ i1 1, i1 0, i1 1, i1 0, i1 1, i1 0, i1 1, i1 0>
+ %r = sext <16 x i1> %mxor to <16 x i16>
+ store <16 x i16> %r, ptr %res
+ ret void
+}
+
+define void @or_zext_masks_v4i64(ptr %res, ptr %a, ptr %b) nounwind {
+; LA32-LABEL: or_zext_masks_v4i64:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvld $xr1, $a2, 0
+; LA32-NEXT: xvfcmp.clt.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 0
+; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 2
+; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 1
+; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 4
+; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 2
+; LA32-NEXT: xvpickve2gr.w $a1, $xr0, 6
+; LA32-NEXT: vinsgr2vr.w $vr1, $a1, 3
+; LA32-NEXT: vldi $vr0, -1777
+; LA32-NEXT: vor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vextrins.w $vr1, $vr0, 2
+; LA32-NEXT: vextrins.w $vr1, $vr0, 35
+; LA32-NEXT: vextrins.w $vr0, $vr0, 33
+; LA32-NEXT: xvpermi.q $xr0, $xr1, 2
+; LA32-NEXT: xvrepli.d $xr1, 1
+; LA32-NEXT: xvand.v $xr0, $xr0, $xr1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: or_zext_masks_v4i64:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvld $xr1, $a2, 0
+; LA64-NEXT: xvfcmp.clt.d $xr0, $xr0, $xr1
+; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 0
+; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 0
+; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 1
+; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 1
+; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 2
+; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 2
+; LA64-NEXT: xvpickve2gr.d $a1, $xr0, 3
+; LA64-NEXT: vinsgr2vr.w $vr1, $a1, 3
+; LA64-NEXT: vldi $vr0, -1777
+; LA64-NEXT: vor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vpickve2gr.w $a1, $vr0, 2
+; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 0
+; LA64-NEXT: vpickve2gr.w $a1, $vr0, 3
+; LA64-NEXT: vinsgr2vr.d $vr1, $a1, 1
+; LA64-NEXT: vpickve2gr.w $a1, $vr0, 0
+; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 0
+; LA64-NEXT: vpickve2gr.w $a1, $vr0, 1
+; LA64-NEXT: vinsgr2vr.d $vr2, $a1, 1
+; LA64-NEXT: xvpermi.q $xr2, $xr1, 2
+; LA64-NEXT: xvrepli.d $xr0, 1
+; LA64-NEXT: xvand.v $xr0, $xr2, $xr0
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
+ %v0 = load <4 x double>, ptr %a
+ %v1 = load <4 x double>, ptr %b
+ %m0 = fcmp olt <4 x double> %v0, %v1
+ %mor = or <4 x i1> %m0, <i1 1, i1 0, i1 1, i1 0>
+ %r = zext <4 x i1> %mor to <4 x i64>
+ store <4 x i64> %r, ptr %res
+ ret void
+}
+
+define void @or_zext_masks_v8i32(ptr %res, ptr %a, ptr %b) nounwind {
+; CHECK-LABEL: or_zext_masks_v8i32:
+; CHECK: # %bb.0:
+; CHECK-NEXT: xvld $xr0, $a1, 0
+; CHECK-NEXT:...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/163490
More information about the llvm-commits
mailing list