[llvm] a5bef98 - [PowerPC][NFC] Add additional vector_shuffle tests involving scalar_to_vector.
Amy Kwan via llvm-commits
llvm-commits at lists.llvm.org
Mon Aug 15 10:31:13 PDT 2022
Author: Amy Kwan
Date: 2022-08-15T12:30:58-05:00
New Revision: a5bef98c751303b03fe6cfe56bead25f705b9381
URL: https://github.com/llvm/llvm-project/commit/a5bef98c751303b03fe6cfe56bead25f705b9381
DIFF: https://github.com/llvm/llvm-project/commit/a5bef98c751303b03fe6cfe56bead25f705b9381.diff
LOG: [PowerPC][NFC] Add additional vector_shuffle tests involving scalar_to_vector.
This patch adds additional test cases involving vector_shuffles where either its
left, right or both inputs are scalar_to_vector nodes. These test cases involve
v16i8, v2i64, v4i32 and v8i16 vector shuffles, and were generated in preparation
for D130487.
Differential Revision: https://reviews.llvm.org/D130485
Added:
llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
Modified:
Removed:
llvm/test/CodeGen/PowerPC/scalar_to_vector_shuffle.ll
################################################################################
diff --git a/llvm/test/CodeGen/PowerPC/scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/scalar_to_vector_shuffle.ll
deleted file mode 100644
index aa7484af44700..0000000000000
--- a/llvm/test/CodeGen/PowerPC/scalar_to_vector_shuffle.ll
+++ /dev/null
@@ -1,138 +0,0 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
-; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
-; RUN: FileCheck %s --check-prefix=CHECK-LE-P8
-; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
-; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
-; RUN: FileCheck %s --check-prefix=CHECK-LE-P9
-; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
-; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
-; RUN: FileCheck %s --check-prefix=CHECK-BE-P8
-; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
-; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
-; RUN: FileCheck %s --check-prefix=CHECK-BE-P9
-
-; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
-; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
-; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P8
-; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
-; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
-; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9
-; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
-; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
-; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P8
-; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
-; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
-; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9
-
-define <16 x i8> @test_4_8(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) local_unnamed_addr {
-; CHECK-LE-P8-LABEL: test_4_8:
-; CHECK-LE-P8: # %bb.0: # %entry
-; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI0_0 at toc@ha
-; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3
-; CHECK-LE-P8-NEXT: lfdx f1, 0, r4
-; CHECK-LE-P8-NEXT: addi r3, r5, .LCPI0_0 at toc@l
-; CHECK-LE-P8-NEXT: lxvd2x vs2, 0, r3
-; CHECK-LE-P8-NEXT: xxswapd v2, f0
-; CHECK-LE-P8-NEXT: xxswapd v3, f1
-; CHECK-LE-P8-NEXT: xxswapd v4, vs2
-; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4
-; CHECK-LE-P8-NEXT: blr
-;
-; CHECK-LE-P9-LABEL: test_4_8:
-; CHECK-LE-P9: # %bb.0: # %entry
-; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3
-; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-LE-P9-NEXT: xxswapd v2, f0
-; CHECK-LE-P9-NEXT: lfd f0, 0(r4)
-; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
-; CHECK-LE-P9-NEXT: xxswapd v3, f0
-; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4
-; CHECK-LE-P9-NEXT: blr
-;
-; CHECK-BE-P8-LABEL: test_4_8:
-; CHECK-BE-P8: # %bb.0: # %entry
-; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3
-; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI0_0 at toc@ha
-; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4
-; CHECK-BE-P8-NEXT: addi r3, r5, .LCPI0_0 at toc@l
-; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3
-; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1
-; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4
-; CHECK-BE-P8-NEXT: blr
-;
-; CHECK-BE-P9-LABEL: test_4_8:
-; CHECK-BE-P9: # %bb.0: # %entry
-; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3
-; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
-; CHECK-BE-P9-NEXT: lxsd v3, 0(r4)
-; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI0_0 at toc@l
-; CHECK-BE-P9-NEXT: lxv v4, 0(r3)
-; CHECK-BE-P9-NEXT: xxsldwi v2, f0, f0, 1
-; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4
-; CHECK-BE-P9-NEXT: blr
-;
-; CHECK-AIX-64-P8-LABEL: test_4_8:
-; CHECK-AIX-64-P8: # %bb.0: # %entry
-; CHECK-AIX-64-P8-NEXT: ld r5, L..C0(r2) # %const.0
-; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3
-; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4
-; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1
-; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5
-; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4
-; CHECK-AIX-64-P8-NEXT: blr
-;
-; CHECK-AIX-64-P9-LABEL: test_4_8:
-; CHECK-AIX-64-P9: # %bb.0: # %entry
-; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3
-; CHECK-AIX-64-P9-NEXT: ld r3, L..C0(r2) # %const.0
-; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4)
-; CHECK-AIX-64-P9-NEXT: xxsldwi v2, f0, f0, 1
-; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3)
-; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4
-; CHECK-AIX-64-P9-NEXT: blr
-;
-; CHECK-AIX-32-P8-LABEL: test_4_8:
-; CHECK-AIX-32-P8: # %bb.0: # %entry
-; CHECK-AIX-32-P8-NEXT: lwz r5, 4(r4)
-; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1)
-; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4)
-; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
-; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1)
-; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3
-; CHECK-AIX-32-P8-NEXT: lwz r3, L..C0(r2) # %const.0
-; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4
-; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
-; CHECK-AIX-32-P8-NEXT: xxmrghw v3, vs1, vs0
-; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4
-; CHECK-AIX-32-P8-NEXT: blr
-;
-; CHECK-AIX-32-P9-LABEL: test_4_8:
-; CHECK-AIX-32-P9: # %bb.0: # %entry
-; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3
-; CHECK-AIX-32-P9-NEXT: lwz r3, 4(r4)
-; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
-; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4)
-; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
-; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1)
-; CHECK-AIX-32-P9-NEXT: lwz r3, L..C0(r2) # %const.0
-; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
-; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3)
-; CHECK-AIX-32-P9-NEXT: xxmrghw v3, vs1, vs0
-; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4
-; CHECK-AIX-32-P9-NEXT: blr
-entry:
- %0 = load <4 x i8>, ptr %a, align 4
- %bc1 = bitcast <4 x i8> %0 to i32
- %vecinit3 = insertelement <4 x i32> poison, i32 %bc1, i64 0
- %1 = load <8 x i8>, ptr %b, align 8
- %bc2 = bitcast <8 x i8> %1 to i64
- %vecinit6 = insertelement <2 x i64> undef, i64 %bc2, i64 0
- %2 = bitcast <4 x i32> %vecinit3 to <16 x i8>
- %3 = bitcast <2 x i64> %vecinit6 to <16 x i8>
- %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
- ret <16 x i8> %shuffle
-}
diff --git a/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
new file mode 100644
index 0000000000000..8db66f37a97b9
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/v16i8_scalar_to_vector_shuffle.ll
@@ -0,0 +1,2090 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-LE-P8
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-LE-P9
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE-P8
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE-P9
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P8
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P8
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9
+
+define <16 x i8> @test_v16i8_v16i8(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
+; CHECK-LE-P8-LABEL: test_v16i8_v16i8:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lbz r3, 0(r3)
+; CHECK-LE-P8-NEXT: lbz r4, 0(r4)
+; CHECK-LE-P8-NEXT: mtvsrd v2, r3
+; CHECK-LE-P8-NEXT: mtvsrd v3, r4
+; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v16i8_v16i8:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsibzx v2, 0, r3
+; CHECK-LE-P9-NEXT: lxsibzx v3, 0, r4
+; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v16i8_v16i8:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI0_0 at toc@ha
+; CHECK-BE-P8-NEXT: lbz r4, 0(r4)
+; CHECK-BE-P8-NEXT: lbz r3, 0(r3)
+; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI0_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v2, 0, r5
+; CHECK-BE-P8-NEXT: mtvsrwz v3, r4
+; CHECK-BE-P8-NEXT: mtvsrwz v4, r3
+; CHECK-BE-P8-NEXT: vperm v2, v4, v3, v2
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v16i8_v16i8:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: addis r5, r2, .LCPI0_0 at toc@ha
+; CHECK-BE-P9-NEXT: lxsibzx v3, 0, r4
+; CHECK-BE-P9-NEXT: lxsibzx v4, 0, r3
+; CHECK-BE-P9-NEXT: addi r5, r5, .LCPI0_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv v2, 0(r5)
+; CHECK-BE-P9-NEXT: vperm v2, v4, v3, v2
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v16i8_v16i8:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: ld r5, L..C0(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: lbz r4, 0(r4)
+; CHECK-AIX-64-P8-NEXT: lbz r3, 0(r3)
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r4
+; CHECK-AIX-64-P8-NEXT: lxvw4x v2, 0, r5
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3
+; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v3, v2
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v16i8_v16i8:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: ld r5, L..C0(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: lxsibzx v3, 0, r4
+; CHECK-AIX-64-P9-NEXT: lxsibzx v4, 0, r3
+; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r5)
+; CHECK-AIX-64-P9-NEXT: vperm v2, v4, v3, v2
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v16i8_v16i8:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r5, L..C0(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lbz r4, 0(r4)
+; CHECK-AIX-32-P8-NEXT: lbz r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: mtvsrwz v3, r4
+; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5
+; CHECK-AIX-32-P8-NEXT: mtvsrwz v4, r3
+; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v3, v2
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v16i8_v16i8:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lwz r5, L..C0(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxsibzx v3, 0, r4
+; CHECK-AIX-32-P9-NEXT: lxsibzx v4, 0, r3
+; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r5)
+; CHECK-AIX-32-P9-NEXT: vperm v2, v4, v3, v2
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <1 x i8>, ptr %a, align 4
+ %bc1 = bitcast <1 x i8> %0 to i8
+ %vecinit3 = insertelement <16 x i8> poison, i8 %bc1, i64 0
+ %1 = load <1 x i8>, ptr %b, align 8
+ %bc2 = bitcast <1 x i8> %1 to i8
+ %vecinit6 = insertelement <16 x i8> undef, i8 %bc2, i64 0
+ %2 = bitcast <16 x i8> %vecinit3 to <16 x i8>
+ %3 = bitcast <16 x i8> %vecinit6 to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @test_v16i8_none(<16 x i8> %a, i8 %b) {
+; CHECK-LE-P8-LABEL: test_v16i8_none:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-LE-P8-NEXT: mtvsrd v4, r5
+; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: xxswapd v3, vs0
+; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v16i8_none:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtvsrwz v3, r5
+; CHECK-LE-P9-NEXT: vinsertb v2, v3, 15
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v16i8_none:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-P8-NEXT: mtvsrwz v4, r5
+; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v16i8_none:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: mtvsrwz v3, r5
+; CHECK-BE-P9-NEXT: vinsertb v2, v3, 0
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v16i8_none:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: ld r4, L..C1(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3
+; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v16i8_none:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r3
+; CHECK-AIX-64-P9-NEXT: vinsertb v2, v3, 0
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v16i8_none:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r4, L..C1(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: mtvsrwz v4, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v16i8_none:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: mtvsrwz v3, r3
+; CHECK-AIX-32-P9-NEXT: vinsertb v2, v3, 0
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %vecins = insertelement <16 x i8> %a, i8 %b, i32 0
+ ret <16 x i8> %vecins
+}
+
+define <16 x i8> @test_none_v16i8(i8 %arg, ptr nocapture noundef readonly %b) {
+; CHECK-LE-P8-LABEL: test_none_v16i8:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI2_0 at toc@ha
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT: mtvsrd v4, r3
+; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI2_0 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: xxswapd v3, vs1
+; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_none_v16i8:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtvsrd v3, r3
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-LE-P9-NEXT: lxv v2, 0(r4)
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_0 at toc@l
+; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_none_v16i8:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI2_0 at toc@ha
+; CHECK-BE-P8-NEXT: lxvw4x v2, 0, r4
+; CHECK-BE-P8-NEXT: mtvsrwz v4, r3
+; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI2_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r5
+; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_none_v16i8:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: mtvsrwz v3, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-BE-P9-NEXT: lxv v2, 0(r4)
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_none_v16i8:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: ld r5, L..C2(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: lxvw4x v2, 0, r4
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3
+; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r5
+; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_none_v16i8:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r3
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C1(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4)
+; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_none_v16i8:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r5
+; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_none_v16i8:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4)
+; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs = load <16 x i8>, ptr %b, align 4
+ %rhs = insertelement <16 x i8> undef, i8 %arg, i32 0
+ %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @test_v16i8_v8i16(i16 %arg, i8 %arg1) {
+; CHECK-LE-P8-LABEL: test_v16i8_v8i16:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r4
+; CHECK-LE-P8-NEXT: mtfprd f1, r3
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: xxswapd v3, vs1
+; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v16i8_v8i16:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r4
+; CHECK-LE-P9-NEXT: xxswapd v2, vs0
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: xxswapd v3, vs0
+; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v16i8_v8i16:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: sldi r4, r4, 56
+; CHECK-BE-P8-NEXT: sldi r3, r3, 48
+; CHECK-BE-P8-NEXT: mtvsrd v2, r4
+; CHECK-BE-P8-NEXT: mtvsrd v3, r3
+; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v16i8_v8i16:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: sldi r4, r4, 56
+; CHECK-BE-P9-NEXT: sldi r3, r3, 48
+; CHECK-BE-P9-NEXT: mtvsrd v2, r4
+; CHECK-BE-P9-NEXT: mtvsrd v3, r3
+; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v16i8_v8i16:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 56
+; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48
+; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r4
+; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3
+; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v16i8_v8i16:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: sldi r4, r4, 56
+; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48
+; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r4
+; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3
+; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v16i8_v8i16:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -32
+; CHECK-AIX-32-P8-NEXT: stb r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5
+; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v16i8_v8i16:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stb r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs = insertelement <16 x i8> undef, i8 %arg1, i32 0
+ %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0
+ %rhs = bitcast <8 x i16> %rhs.tmp to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @test_v8i16_v16i8(i16 %arg, i8 %arg1) {
+; CHECK-LE-P8-LABEL: test_v8i16_v16i8:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r4
+; CHECK-LE-P8-NEXT: mtfprd f1, r3
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: xxswapd v3, vs1
+; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v8i16_v16i8:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r4
+; CHECK-LE-P9-NEXT: xxswapd v2, vs0
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: xxswapd v3, vs0
+; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v8i16_v16i8:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: sldi r4, r4, 56
+; CHECK-BE-P8-NEXT: sldi r3, r3, 48
+; CHECK-BE-P8-NEXT: mtvsrd v2, r4
+; CHECK-BE-P8-NEXT: mtvsrd v3, r3
+; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v8i16_v16i8:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: sldi r4, r4, 56
+; CHECK-BE-P9-NEXT: sldi r3, r3, 48
+; CHECK-BE-P9-NEXT: mtvsrd v2, r4
+; CHECK-BE-P9-NEXT: mtvsrd v3, r3
+; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v8i16_v16i8:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 56
+; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48
+; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r4
+; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3
+; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v8i16_v16i8:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: sldi r4, r4, 56
+; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48
+; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r4
+; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3
+; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v8i16_v16i8:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -32
+; CHECK-AIX-32-P8-NEXT: stb r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5
+; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v8i16_v16i8:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stb r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %rhs = insertelement <16 x i8> undef, i8 %arg1, i32 0
+ %lhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0
+ %lhs = bitcast <8 x i16> %lhs.tmp to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @test_none_v8i16(i16 %arg, ptr nocapture noundef readonly %b) {
+; CHECK-LE-P8-LABEL: test_none_v8i16:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI5_0 at toc@ha
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT: mtvsrd v4, r3
+; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI5_0 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: xxswapd v3, vs1
+; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_none_v8i16:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtvsrd v3, r3
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI5_0 at toc@ha
+; CHECK-LE-P9-NEXT: lxv v2, 0(r4)
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI5_0 at toc@l
+; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_none_v8i16:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI5_0 at toc@ha
+; CHECK-BE-P8-NEXT: lxvw4x v2, 0, r4
+; CHECK-BE-P8-NEXT: mtvsrwz v4, r3
+; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI5_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r5
+; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_none_v8i16:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: mtvsrwz v3, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI5_0 at toc@ha
+; CHECK-BE-P9-NEXT: lxv v2, 0(r4)
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI5_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_none_v8i16:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: ld r5, L..C3(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: lxvw4x v2, 0, r4
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3
+; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r5
+; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_none_v8i16:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r3
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C2(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4)
+; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_none_v8i16:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r5
+; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_none_v8i16:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4)
+; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs = load <16 x i8>, ptr %b, align 4
+ %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0
+ %rhs = bitcast <8 x i16> %rhs.tmp to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <8 x i16> @test_v8i16_none(<8 x i16> %a, i16 %b) {
+; CHECK-LE-P8-LABEL: test_v8i16_none:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI6_0 at toc@ha
+; CHECK-LE-P8-NEXT: mtvsrd v4, r5
+; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI6_0 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: xxswapd v3, vs0
+; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v8i16_none:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtvsrwz v3, r5
+; CHECK-LE-P9-NEXT: vinserth v2, v3, 14
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v8i16_none:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI6_0 at toc@ha
+; CHECK-BE-P8-NEXT: mtvsrwz v4, r5
+; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI6_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v8i16_none:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: mtvsrwz v3, r5
+; CHECK-BE-P9-NEXT: vinserth v2, v3, 0
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v8i16_none:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: ld r4, L..C4(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3
+; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v8i16_none:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r3
+; CHECK-AIX-64-P9-NEXT: vinserth v2, v3, 0
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v8i16_none:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r4, L..C2(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v8i16_none:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: mtvsrwz v3, r3
+; CHECK-AIX-32-P9-NEXT: vinserth v2, v3, 0
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %vecins = insertelement <8 x i16> %a, i16 %b, i32 0
+ ret <8 x i16> %vecins
+}
+
+define <16 x i8> @test_v16i8_v4i32(i8 %arg, i32 %arg1, <16 x i8> %a, <4 x i32> %b) {
+; CHECK-LE-P8-LABEL: test_v16i8_v4i32:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: xxswapd v3, vs1
+; CHECK-LE-P8-NEXT: vmrglb v2, v3, v2
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v16i8_v4i32:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: mtvsrws v3, r4
+; CHECK-LE-P9-NEXT: xxswapd v2, vs0
+; CHECK-LE-P9-NEXT: vmrglb v2, v3, v2
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v16i8_v4i32:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: sldi r3, r3, 56
+; CHECK-BE-P8-NEXT: sldi r4, r4, 32
+; CHECK-BE-P8-NEXT: mtvsrd v2, r3
+; CHECK-BE-P8-NEXT: mtvsrd v3, r4
+; CHECK-BE-P8-NEXT: vmrghb v2, v2, v3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v16i8_v4i32:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: sldi r3, r3, 56
+; CHECK-BE-P9-NEXT: mtvsrws v3, r4
+; CHECK-BE-P9-NEXT: mtvsrd v2, r3
+; CHECK-BE-P9-NEXT: vmrghb v2, v2, v3
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v16i8_v4i32:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56
+; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 32
+; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3
+; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r4
+; CHECK-AIX-64-P8-NEXT: vmrghb v2, v2, v3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v16i8_v4i32:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56
+; CHECK-AIX-64-P9-NEXT: mtvsrws v3, r4
+; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r3
+; CHECK-AIX-64-P9-NEXT: vmrghb v2, v2, v3
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v16i8_v4i32:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5
+; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: vmrghb v2, v2, v3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v16i8_v4i32:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: vmrghb v2, v2, v3
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <16 x i8> %a, i8 %arg, i32 0
+ %lhs = bitcast <16 x i8> %lhs.tmp to <16 x i8>
+ %rhs.tmp = insertelement <4 x i32> %b, i32 %arg1, i32 0
+ %rhs = bitcast <4 x i32> %rhs.tmp to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @test_v4i32_v16i8(i32 %arg, i8 %arg1) {
+; CHECK-LE-P8-LABEL: test_v4i32_v16i8:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r4
+; CHECK-LE-P8-NEXT: mtfprd f1, r3
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: xxswapd v3, vs1
+; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v4i32_v16i8:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r4
+; CHECK-LE-P9-NEXT: mtvsrws v3, r3
+; CHECK-LE-P9-NEXT: xxswapd v2, vs0
+; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v4i32_v16i8:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: sldi r4, r4, 56
+; CHECK-BE-P8-NEXT: sldi r3, r3, 32
+; CHECK-BE-P8-NEXT: mtvsrd v2, r4
+; CHECK-BE-P8-NEXT: mtvsrd v3, r3
+; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v4i32_v16i8:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: sldi r4, r4, 56
+; CHECK-BE-P9-NEXT: mtvsrws v3, r3
+; CHECK-BE-P9-NEXT: mtvsrd v2, r4
+; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v4i32_v16i8:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 56
+; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 32
+; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r4
+; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3
+; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v4i32_v16i8:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: sldi r4, r4, 56
+; CHECK-AIX-64-P9-NEXT: mtvsrws v3, r3
+; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r4
+; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v4i32_v16i8:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -32
+; CHECK-AIX-32-P8-NEXT: stb r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5
+; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v4i32_v16i8:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stb r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %rhs = insertelement <16 x i8> undef, i8 %arg1, i32 0
+ %lhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0
+ %lhs = bitcast <4 x i32> %lhs.tmp to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <4 x i32> @test_none_v4i32(<4 x i32> %a, i64 %b) {
+; CHECK-LE-P8-LABEL: test_none_v4i32:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI9_0 at toc@ha
+; CHECK-LE-P8-NEXT: mtvsrwz v4, r5
+; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI9_0 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI9_1 at toc@ha
+; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI9_1 at toc@l
+; CHECK-LE-P8-NEXT: xxswapd v3, vs0
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT: xxswapd v3, vs0
+; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_none_v4i32:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprwz f0, r5
+; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 8
+; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 0
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_none_v4i32:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI9_0 at toc@ha
+; CHECK-BE-P8-NEXT: mtvsrwz v4, r5
+; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI9_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI9_1 at toc@ha
+; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI9_1 at toc@l
+; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_none_v4i32:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: mtfprwz f0, r5
+; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 4
+; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 12
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: ld r4, L..C5(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3
+; CHECK-AIX-64-P8-NEXT: ld r3, L..C6(r2) # %const.1
+; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_none_v4i32:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: xxinsertw v2, vs0, 4
+; CHECK-AIX-64-P9-NEXT: xxinsertw v2, vs0, 12
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_none_v4i32:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C3(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C4(r2) # %const.1
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_none_v4i32:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r4
+; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 4
+; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 12
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %conv = trunc i64 %b to i32
+ %vecins = insertelement <4 x i32> %a, i32 %conv, i32 1
+ %vecins2 = insertelement <4 x i32> %vecins, i32 %conv, i32 3
+ ret <4 x i32> %vecins2
+}
+
+define <16 x i8> @test_v4i32_none(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
+; CHECK-LE-P8-LABEL: test_v4i32_none:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI10_0 at toc@ha
+; CHECK-LE-P8-NEXT: lbzx r4, 0, r4
+; CHECK-LE-P8-NEXT: lxsiwzx v3, 0, r3
+; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI10_0 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5
+; CHECK-LE-P8-NEXT: mtvsrwz v2, r4
+; CHECK-LE-P8-NEXT: vspltb v2, v2, 7
+; CHECK-LE-P8-NEXT: xxswapd v4, vs0
+; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v4i32_none:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI10_0 at toc@ha
+; CHECK-LE-P9-NEXT: lxsibzx v3, 0, r4
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI10_0 at toc@l
+; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT: vspltb v3, v3, 7
+; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v4i32_none:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lbzx r4, 0, r4
+; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI10_0 at toc@ha
+; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r3
+; CHECK-BE-P8-NEXT: mtvsrwz v2, r4
+; CHECK-BE-P8-NEXT: addi r4, r5, .LCPI10_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r4
+; CHECK-BE-P8-NEXT: vspltb v2, v2, 7
+; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v4i32_none:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI10_0 at toc@ha
+; CHECK-BE-P9-NEXT: lxsibzx v3, 0, r4
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI10_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT: vspltb v3, v3, 7
+; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v4i32_none:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lbzx r4, 0, r4
+; CHECK-AIX-64-P8-NEXT: ld r5, L..C7(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r3
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5
+; CHECK-AIX-64-P8-NEXT: vspltb v2, v2, 7
+; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v4i32_none:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: lxsibzx v3, 0, r4
+; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT: vspltb v3, v3, 7
+; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v4i32_none:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lbzx r4, 0, r4
+; CHECK-AIX-32-P8-NEXT: lwz r5, L..C5(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5
+; CHECK-AIX-32-P8-NEXT: vspltb v2, v2, 7
+; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v4i32_none:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C1(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxsibzx v3, 0, r4
+; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: vspltb v3, v3, 7
+; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <4 x i8>, ptr %a, align 4
+ %bc1 = bitcast <4 x i8> %0 to i32
+ %vecinit3 = insertelement <4 x i32> poison, i32 %bc1, i64 0
+ %1 = load <1 x i8>, ptr %b, align 8
+ %bc2 = bitcast <1 x i8> %1 to i8
+ %vecinit6 = insertelement <16 x i8> undef, i8 %bc2, i64 0
+ %2 = bitcast <4 x i32> %vecinit3 to <16 x i8>
+ %3 = bitcast <16 x i8> %vecinit6 to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @test_v16i8_v2i64(i8 %arg, i64 %arg1, <16 x i8> %a, <2 x i64> %b) {
+; CHECK-LE-P8-LABEL: test_v16i8_v2i64:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: xxswapd v3, vs1
+; CHECK-LE-P8-NEXT: vmrglb v2, v3, v2
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v16i8_v2i64:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: xxswapd v2, vs0
+; CHECK-LE-P9-NEXT: mtfprd f0, r4
+; CHECK-LE-P9-NEXT: xxswapd v3, vs0
+; CHECK-LE-P9-NEXT: vmrglb v2, v3, v2
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v16i8_v2i64:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: sldi r3, r3, 56
+; CHECK-BE-P8-NEXT: mtvsrd v3, r4
+; CHECK-BE-P8-NEXT: mtvsrd v2, r3
+; CHECK-BE-P8-NEXT: vmrghb v2, v2, v3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v16i8_v2i64:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: sldi r3, r3, 56
+; CHECK-BE-P9-NEXT: mtvsrd v3, r4
+; CHECK-BE-P9-NEXT: mtvsrd v2, r3
+; CHECK-BE-P9-NEXT: vmrghb v2, v2, v3
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v16i8_v2i64:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56
+; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r4
+; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3
+; CHECK-AIX-64-P8-NEXT: vmrghb v2, v2, v3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v16i8_v2i64:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56
+; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r4
+; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r3
+; CHECK-AIX-64-P9-NEXT: vmrghb v2, v2, v3
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v16i8_v2i64:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5
+; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: vmrghb v2, v2, v3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v16i8_v2i64:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: vmrghb v2, v2, v3
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <16 x i8> %a, i8 %arg, i32 0
+ %lhs = bitcast <16 x i8> %lhs.tmp to <16 x i8>
+ %rhs.tmp = insertelement <2 x i64> %b, i64 %arg1, i32 0
+ %rhs = bitcast <2 x i64> %rhs.tmp to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @test_v2i64_v16i8(i64 %arg, i8 %arg1) {
+; CHECK-LE-P8-LABEL: test_v2i64_v16i8:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r4
+; CHECK-LE-P8-NEXT: mtfprd f1, r3
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: xxswapd v3, vs1
+; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v2i64_v16i8:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r4
+; CHECK-LE-P9-NEXT: xxswapd v2, vs0
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: xxswapd v3, vs0
+; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v2i64_v16i8:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: sldi r4, r4, 56
+; CHECK-BE-P8-NEXT: mtvsrd v3, r3
+; CHECK-BE-P8-NEXT: mtvsrd v2, r4
+; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v2i64_v16i8:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: sldi r4, r4, 56
+; CHECK-BE-P9-NEXT: mtvsrd v3, r3
+; CHECK-BE-P9-NEXT: mtvsrd v2, r4
+; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v2i64_v16i8:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 56
+; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3
+; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r4
+; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v2i64_v16i8:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: sldi r4, r4, 56
+; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3
+; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r4
+; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v2i64_v16i8:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: stb r5, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v2i64_v16i8:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stb r5, -32(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %rhs = insertelement <16 x i8> undef, i8 %arg1, i32 0
+ %lhs.tmp = insertelement <2 x i64> undef, i64 %arg, i32 0
+ %lhs = bitcast <2 x i64> %lhs.tmp to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define dso_local <16 x i8> @test_1_2(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) local_unnamed_addr {
+; CHECK-LE-P8-LABEL: test_1_2:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI13_0 at toc@ha
+; CHECK-LE-P8-NEXT: lbzx r3, 0, r3
+; CHECK-LE-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI13_0 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5
+; CHECK-LE-P8-NEXT: mtvsrwz v2, r3
+; CHECK-LE-P8-NEXT: vspltb v2, v2, 7
+; CHECK-LE-P8-NEXT: xxswapd v4, vs0
+; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_1_2:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsibzx v2, 0, r3
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI13_0 at toc@ha
+; CHECK-LE-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI13_0 at toc@l
+; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT: vspltb v2, v2, 7
+; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_1_2:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lbzx r3, 0, r3
+; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-BE-P8-NEXT: mtvsrwz v2, r3
+; CHECK-BE-P8-NEXT: vspltb v2, v2, 7
+; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_1_2:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsibzx v2, 0, r3
+; CHECK-BE-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-BE-P9-NEXT: vspltb v2, v2, 7
+; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_1_2:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lbzx r3, 0, r3
+; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r3
+; CHECK-AIX-64-P8-NEXT: vspltb v2, v2, 7
+; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_1_2:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsibzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-AIX-64-P9-NEXT: vspltb v2, v2, 7
+; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_1_2:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lbzx r3, 0, r3
+; CHECK-AIX-32-P8-NEXT: lwz r5, L..C6(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5
+; CHECK-AIX-32-P8-NEXT: vspltb v2, v2, 7
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_1_2:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxsibzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C2(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: vspltb v2, v2, 7
+; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <1 x i8>, ptr %a, align 4
+ %bc1 = bitcast <1 x i8> %0 to i8
+ %vecinit3 = insertelement <16 x i8> poison, i8 %bc1, i64 0
+ %1 = load <2 x i8>, ptr %b, align 8
+ %bc2 = bitcast <2 x i8> %1 to i16
+ %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0
+ %2 = bitcast <16 x i8> %vecinit3 to <16 x i8>
+ %3 = bitcast <8 x i16> %vecinit6 to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @test_none_v2i64(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
+; CHECK-LE-P8-LABEL: test_none_v2i64:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI14_0 at toc@ha
+; CHECK-LE-P8-NEXT: lbzx r3, 0, r3
+; CHECK-LE-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI14_0 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5
+; CHECK-LE-P8-NEXT: mtvsrwz v2, r3
+; CHECK-LE-P8-NEXT: vspltb v2, v2, 7
+; CHECK-LE-P8-NEXT: xxswapd v4, vs0
+; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_none_v2i64:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsibzx v2, 0, r3
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI14_0 at toc@ha
+; CHECK-LE-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI14_0 at toc@l
+; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT: vspltb v2, v2, 7
+; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_none_v2i64:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lbzx r3, 0, r3
+; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-BE-P8-NEXT: mtvsrwz v2, r3
+; CHECK-BE-P8-NEXT: vspltb v2, v2, 7
+; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_none_v2i64:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsibzx v2, 0, r3
+; CHECK-BE-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-BE-P9-NEXT: vspltb v2, v2, 7
+; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_none_v2i64:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lbzx r3, 0, r3
+; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r3
+; CHECK-AIX-64-P8-NEXT: vspltb v2, v2, 7
+; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_none_v2i64:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsibzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-AIX-64-P9-NEXT: vspltb v2, v2, 7
+; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_none_v2i64:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lbzx r3, 0, r3
+; CHECK-AIX-32-P8-NEXT: lwz r5, L..C7(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5
+; CHECK-AIX-32-P8-NEXT: vspltb v2, v2, 7
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_none_v2i64:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxsibzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C3(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: vspltb v2, v2, 7
+; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <1 x i8>, ptr %a, align 4
+ %bc1 = bitcast <1 x i8> %0 to i8
+ %vecinit3 = insertelement <16 x i8> poison, i8 %bc1, i64 0
+ %1 = load <2 x i8>, ptr %b, align 8
+ %bc2 = bitcast <2 x i8> %1 to i16
+ %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0
+ %2 = bitcast <16 x i8> %vecinit3 to <16 x i8>
+ %3 = bitcast <8 x i16> %vecinit6 to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @test_v2i64_none(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
+; CHECK-LE-P8-LABEL: test_v2i64_none:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI15_0 at toc@ha
+; CHECK-LE-P8-NEXT: lbzx r4, 0, r4
+; CHECK-LE-P8-NEXT: lxsdx v3, 0, r3
+; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI15_0 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5
+; CHECK-LE-P8-NEXT: mtvsrwz v2, r4
+; CHECK-LE-P8-NEXT: vspltb v2, v2, 7
+; CHECK-LE-P8-NEXT: xxswapd v4, vs0
+; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v2i64_none:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsd v2, 0(r3)
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI15_0 at toc@ha
+; CHECK-LE-P9-NEXT: lxsibzx v3, 0, r4
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI15_0 at toc@l
+; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT: vspltb v3, v3, 7
+; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v2i64_none:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lbzx r4, 0, r4
+; CHECK-BE-P8-NEXT: lxsdx v3, 0, r3
+; CHECK-BE-P8-NEXT: mtvsrwz v2, r4
+; CHECK-BE-P8-NEXT: vspltb v2, v2, 7
+; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v2i64_none:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsibzx v3, 0, r4
+; CHECK-BE-P9-NEXT: lxsd v2, 0(r3)
+; CHECK-BE-P9-NEXT: vspltb v3, v3, 7
+; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v2i64_none:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lbzx r4, 0, r4
+; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r3
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-64-P8-NEXT: vspltb v2, v2, 7
+; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v2i64_none:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsibzx v3, 0, r4
+; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT: vspltb v3, v3, 7
+; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v2i64_none:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lbzx r4, 0, r4
+; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3
+; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-32-P8-NEXT: xxspltw v3, vs0, 1
+; CHECK-AIX-32-P8-NEXT: vspltb v2, v2, 7
+; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v2i64_none:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxsibzx v3, 0, r4
+; CHECK-AIX-32-P9-NEXT: lxvwsx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT: vspltb v3, v3, 7
+; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <8 x i8>, ptr %a, align 4
+ %bc1 = bitcast <8 x i8> %0 to i64
+ %vecinit3 = insertelement <2 x i64> poison, i64 %bc1, i64 0
+ %1 = load <1 x i8>, ptr %b, align 8
+ %bc2 = bitcast <1 x i8> %1 to i8
+ %vecinit6 = insertelement <16 x i8> undef, i8 %bc2, i64 0
+ %2 = bitcast <2 x i64> %vecinit3 to <16 x i8>
+ %3 = bitcast <16 x i8> %vecinit6 to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @test_v8i16_v8i16rhs(i16 %arg, i16 %arg1) {
+; CHECK-LE-P8-LABEL: test_v8i16_v8i16rhs:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtvsrd v2, r3
+; CHECK-LE-P8-NEXT: mtvsrd v3, r4
+; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v8i16_v8i16rhs:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtvsrd v2, r3
+; CHECK-LE-P9-NEXT: mtvsrd v3, r4
+; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v8i16_v8i16rhs:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI16_0 at toc@ha
+; CHECK-BE-P8-NEXT: mtvsrwz v3, r4
+; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI16_0 at toc@l
+; CHECK-BE-P8-NEXT: mtvsrwz v4, r3
+; CHECK-BE-P8-NEXT: lxvw4x v2, 0, r5
+; CHECK-BE-P8-NEXT: vperm v2, v4, v3, v2
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v8i16_v8i16rhs:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: addis r5, r2, .LCPI16_0 at toc@ha
+; CHECK-BE-P9-NEXT: mtvsrwz v3, r4
+; CHECK-BE-P9-NEXT: mtvsrwz v4, r3
+; CHECK-BE-P9-NEXT: addi r5, r5, .LCPI16_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv v2, 0(r5)
+; CHECK-BE-P9-NEXT: vperm v2, v4, v3, v2
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16rhs:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: ld r5, L..C8(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3
+; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r5
+; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16rhs:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: ld r5, L..C4(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r4
+; CHECK-AIX-64-P9-NEXT: mtvsrwz v4, r3
+; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r5)
+; CHECK-AIX-64-P9-NEXT: vperm v2, v4, v3, v2
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16rhs:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: sth r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v8i16_v8i16rhs:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: sth r4, -16(r1)
+; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %rhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0
+ %rhs = bitcast <8 x i16> %rhs.tmp to <16 x i8>
+ %lhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0
+ %lhs = bitcast <8 x i16> %lhs.tmp to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @test_v8i16_v4i32(<8 x i16> %a, <4 x i32> %b, i16 %arg, i32 %arg1) {
+; CHECK-LE-P8-LABEL: test_v8i16_v4i32:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r7
+; CHECK-LE-P8-NEXT: mtfprd f1, r8
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: xxswapd v3, vs1
+; CHECK-LE-P8-NEXT: vmrglb v2, v3, v2
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v8i16_v4i32:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r7
+; CHECK-LE-P9-NEXT: mtvsrws v3, r8
+; CHECK-LE-P9-NEXT: xxswapd v2, vs0
+; CHECK-LE-P9-NEXT: vmrglb v2, v3, v2
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v8i16_v4i32:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: sldi r3, r7, 48
+; CHECK-BE-P8-NEXT: sldi r4, r8, 32
+; CHECK-BE-P8-NEXT: mtvsrd v2, r3
+; CHECK-BE-P8-NEXT: mtvsrd v3, r4
+; CHECK-BE-P8-NEXT: vmrghb v2, v2, v3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v8i16_v4i32:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: sldi r3, r7, 48
+; CHECK-BE-P9-NEXT: mtvsrws v3, r8
+; CHECK-BE-P9-NEXT: mtvsrd v2, r3
+; CHECK-BE-P9-NEXT: vmrghb v2, v2, v3
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48
+; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 32
+; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3
+; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r4
+; CHECK-AIX-64-P8-NEXT: vmrghb v2, v2, v3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48
+; CHECK-AIX-64-P9-NEXT: mtvsrws v3, r4
+; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r3
+; CHECK-AIX-64-P9-NEXT: vmrghb v2, v2, v3
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5
+; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: vmrghb v2, v2, v3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: vmrghb v2, v2, v3
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <8 x i16> %a, i16 %arg, i32 0
+ %lhs = bitcast <8 x i16> %lhs.tmp to <16 x i8>
+ %rhs.tmp = insertelement <4 x i32> %b, i32 %arg1, i32 0
+ %rhs = bitcast <4 x i32> %rhs.tmp to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @test_v8i16_v2i64(<8 x i16> %a, <2 x i64> %b, i16 %arg, i64 %arg1) {
+; CHECK-LE-P8-LABEL: test_v8i16_v2i64:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r7
+; CHECK-LE-P8-NEXT: mtfprd f1, r8
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: xxswapd v3, vs1
+; CHECK-LE-P8-NEXT: vmrglb v2, v3, v2
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v8i16_v2i64:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r7
+; CHECK-LE-P9-NEXT: xxswapd v2, vs0
+; CHECK-LE-P9-NEXT: mtfprd f0, r8
+; CHECK-LE-P9-NEXT: xxswapd v3, vs0
+; CHECK-LE-P9-NEXT: vmrglb v2, v3, v2
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v8i16_v2i64:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: sldi r3, r7, 48
+; CHECK-BE-P8-NEXT: mtvsrd v3, r8
+; CHECK-BE-P8-NEXT: mtvsrd v2, r3
+; CHECK-BE-P8-NEXT: vmrghb v2, v2, v3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v8i16_v2i64:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: sldi r3, r7, 48
+; CHECK-BE-P9-NEXT: mtvsrd v3, r8
+; CHECK-BE-P9-NEXT: mtvsrd v2, r3
+; CHECK-BE-P9-NEXT: vmrghb v2, v2, v3
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v8i16_v2i64:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48
+; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r4
+; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3
+; CHECK-AIX-64-P8-NEXT: vmrghb v2, v2, v3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v8i16_v2i64:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48
+; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r4
+; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r3
+; CHECK-AIX-64-P9-NEXT: vmrghb v2, v2, v3
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5
+; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: vmrghb v2, v2, v3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: vmrghb v2, v2, v3
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <8 x i16> %a, i16 %arg, i32 0
+ %lhs = bitcast <8 x i16> %lhs.tmp to <16 x i8>
+ %rhs.tmp = insertelement <2 x i64> %b, i64 %arg1, i32 0
+ %rhs = bitcast <2 x i64> %rhs.tmp to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 16, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @test_v4i32_v4i32(i32 %arg, i32 %arg1, <4 x i32> %a, <4 x i32> %b) {
+; CHECK-LE-P8-LABEL: test_v4i32_v4i32:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprwz f0, r3
+; CHECK-LE-P8-NEXT: mtfprwz f1, r4
+; CHECK-LE-P8-NEXT: xxmrghw v2, vs1, vs0
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v4i32_v4i32:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprwz f0, r3
+; CHECK-LE-P9-NEXT: mtfprwz f1, r4
+; CHECK-LE-P9-NEXT: xxmrghw v2, vs1, vs0
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v4i32_v4i32:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: mtvsrwz v2, r4
+; CHECK-BE-P8-NEXT: mtvsrwz v3, r3
+; CHECK-BE-P8-NEXT: vmrgow v2, v3, v2
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v4i32_v4i32:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: mtvsrwz v2, r4
+; CHECK-BE-P9-NEXT: mtvsrwz v3, r3
+; CHECK-BE-P9-NEXT: vmrgow v2, v3, v2
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r3
+; CHECK-AIX-64-P8-NEXT: vmrgow v2, v3, v2
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: mtvsrwz v2, r4
+; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r3
+; CHECK-AIX-64-P9-NEXT: vmrgow v2, v3, v2
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4
+; CHECK-AIX-32-P8-NEXT: xxmrghw v2, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghw v2, vs1, vs0
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <4 x i32> %a, i32 %arg, i32 0
+ %lhs = bitcast <4 x i32> %lhs.tmp to <16 x i8>
+ %rhs.tmp = insertelement <4 x i32> %b, i32 %arg1, i32 0
+ %rhs = bitcast <4 x i32> %rhs.tmp to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @test_v4i32_v8i16(i32 %arg, i16 %arg1) {
+; CHECK-LE-P8-LABEL: test_v4i32_v8i16:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: xxswapd v3, vs1
+; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v4i32_v8i16:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r4
+; CHECK-LE-P9-NEXT: mtvsrws v2, r3
+; CHECK-LE-P9-NEXT: xxswapd v3, vs0
+; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v4i32_v8i16:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: sldi r3, r3, 32
+; CHECK-BE-P8-NEXT: sldi r4, r4, 48
+; CHECK-BE-P8-NEXT: mtvsrd v2, r3
+; CHECK-BE-P8-NEXT: mtvsrd v3, r4
+; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v4i32_v8i16:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: mtvsrws v2, r3
+; CHECK-BE-P9-NEXT: sldi r3, r4, 48
+; CHECK-BE-P9-NEXT: mtvsrd v3, r3
+; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 32
+; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 48
+; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3
+; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r4
+; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: mtvsrws v2, r3
+; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48
+; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3
+; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r5
+; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0
+ %lhs = bitcast <4 x i32> %lhs.tmp to <16 x i8>
+ %rhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0
+ %rhs = bitcast <8 x i16> %rhs.tmp to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @test_v2i64_v2i64(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
+; CHECK-LE-P8-LABEL: test_v2i64_v2i64:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lxsdx v2, 0, r3
+; CHECK-LE-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-LE-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v2i64_v2i64:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsd v2, 0(r3)
+; CHECK-LE-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-LE-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v2i64_v2i64:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lxsdx v2, 0, r3
+; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v2i64_v2i64:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsd v2, 0(r3)
+; CHECK-BE-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v2i64_v2i64:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r3
+; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v2i64_v2i64:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r5, L..C8(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P8-NEXT: xxspltw v2, vs0, 1
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxvwsx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C4(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <8 x i8>, ptr %a, align 4
+ %bc1 = bitcast <8 x i8> %0 to i64
+ %vecinit3 = insertelement <2 x i64> poison, i64 %bc1, i64 0
+ %1 = load <2 x i8>, ptr %b, align 8
+ %bc2 = bitcast <2 x i8> %1 to i16
+ %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0
+ %2 = bitcast <2 x i64> %vecinit3 to <16 x i8>
+ %3 = bitcast <8 x i16> %vecinit6 to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @test_v2i64_v4i32(i64 %arg, i32 %arg1, <2 x i64> %a, <4 x i32> %b) {
+; CHECK-LE-P8-LABEL: test_v2i64_v4i32:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: xxswapd vs1, vs1
+; CHECK-LE-P8-NEXT: xxmrglw v2, vs1, vs0
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v2i64_v4i32:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: mtvsrws vs1, r4
+; CHECK-LE-P9-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P9-NEXT: xxmrglw v2, vs1, vs0
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v2i64_v4i32:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: mtfprd f0, r3
+; CHECK-BE-P8-NEXT: sldi r3, r4, 32
+; CHECK-BE-P8-NEXT: mtfprd f1, r3
+; CHECK-BE-P8-NEXT: xxmrghw v2, vs0, vs1
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v2i64_v4i32:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: mtvsrws vs1, r4
+; CHECK-BE-P9-NEXT: mtfprd f0, r3
+; CHECK-BE-P9-NEXT: xxmrghw v2, vs0, vs1
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v2i64_v4i32:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3
+; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 32
+; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghw v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v2i64_v4i32:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: mtvsrws vs1, r4
+; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3
+; CHECK-AIX-64-P9-NEXT: xxmrghw v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1)
+; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4
+; CHECK-AIX-32-P8-NEXT: xxmrghw v2, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stw r5, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghw v2, vs1, vs0
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <2 x i64> %a, i64 %arg, i32 0
+ %lhs = bitcast <2 x i64> %lhs.tmp to <16 x i8>
+ %rhs.tmp = insertelement <4 x i32> %b, i32 %arg1, i32 0
+ %rhs = bitcast <4 x i32> %rhs.tmp to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @test_v2i64_v8i16(i64 %arg, i16 %arg1) {
+; CHECK-LE-P8-LABEL: test_v2i64_v8i16:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: xxswapd v3, vs1
+; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v2i64_v8i16:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: xxswapd v2, vs0
+; CHECK-LE-P9-NEXT: mtfprd f0, r4
+; CHECK-LE-P9-NEXT: xxswapd v3, vs0
+; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v2i64_v8i16:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: mtvsrd v2, r3
+; CHECK-BE-P8-NEXT: sldi r3, r4, 48
+; CHECK-BE-P8-NEXT: mtvsrd v3, r3
+; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v2i64_v8i16:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: mtvsrd v2, r3
+; CHECK-BE-P9-NEXT: sldi r3, r4, 48
+; CHECK-BE-P9-NEXT: mtvsrd v3, r3
+; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v2i64_v8i16:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: mtvsrd v2, r3
+; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48
+; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3
+; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v2i64_v8i16:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: mtvsrd v2, r3
+; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48
+; CHECK-AIX-64-P9-NEXT: mtvsrd v3, r3
+; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16
+; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT: sth r5, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: sth r5, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <2 x i64> undef, i64 %arg, i32 0
+ %lhs = bitcast <2 x i64> %lhs.tmp to <16 x i8>
+ %rhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0
+ %rhs = bitcast <8 x i16> %rhs.tmp to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %lhs, <16 x i8> %rhs, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @test_v4i32_v2i64(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
+; CHECK-LE-P8-LABEL: test_v4i32_v2i64:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI24_0 at toc@ha
+; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3
+; CHECK-LE-P8-NEXT: lfdx f1, 0, r4
+; CHECK-LE-P8-NEXT: addi r3, r5, .LCPI24_0 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs2, 0, r3
+; CHECK-LE-P8-NEXT: xxswapd v2, f0
+; CHECK-LE-P8-NEXT: xxswapd v3, f1
+; CHECK-LE-P8-NEXT: xxswapd v4, vs2
+; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v4i32_v2i64:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI24_0 at toc@ha
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI24_0 at toc@l
+; CHECK-LE-P9-NEXT: xxswapd v2, f0
+; CHECK-LE-P9-NEXT: lfd f0, 0(r4)
+; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT: xxswapd v3, f0
+; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v4i32_v2i64:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3
+; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI24_0 at toc@ha
+; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-BE-P8-NEXT: addi r3, r5, .LCPI24_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1
+; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v4i32_v2i64:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI24_0 at toc@ha
+; CHECK-BE-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI24_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT: xxsldwi v2, f0, f0, 1
+; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: ld r5, L..C9(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3
+; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1
+; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5
+; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v4i32_v2i64:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-AIX-64-P9-NEXT: xxsldwi v2, f0, f0, 1
+; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r5, 4(r4)
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1)
+; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4)
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C9(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxmrghw v3, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, 4(r4)
+; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C5(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: xxmrghw v3, vs1, vs0
+; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <4 x i8>, ptr %a, align 4
+ %bc1 = bitcast <4 x i8> %0 to i32
+ %vecinit3 = insertelement <4 x i32> poison, i32 %bc1, i64 0
+ %1 = load <8 x i8>, ptr %b, align 8
+ %bc2 = bitcast <8 x i8> %1 to i64
+ %vecinit6 = insertelement <2 x i64> undef, i64 %bc2, i64 0
+ %2 = bitcast <4 x i32> %vecinit3 to <16 x i8>
+ %3 = bitcast <2 x i64> %vecinit6 to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
diff --git a/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
new file mode 100644
index 0000000000000..4623d34c27ac0
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/v2i64_scalar_to_vector_shuffle.ll
@@ -0,0 +1,1909 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-LE-P8
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-LE-P9
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE-P8
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE-P9
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P8
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P8
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9
+
+define <2 x i64> @test_v16i8_v16i8(i8 %arg1, i8 %arg) {
+; CHECK-LE-P8-LABEL: test_v16i8_v16i8:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v16i8_v16i8:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: mtfprd f1, r4
+; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v16i8_v16i8:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: mtfprwz f0, r3
+; CHECK-BE-P8-NEXT: mtfprwz f1, r4
+; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v16i8_v16i8:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: mtfprwz f0, r3
+; CHECK-BE-P9-NEXT: mtfprwz f1, r4
+; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v16i8_v16i8:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v16i8_v16i8:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v16i8_v16i8:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT: stb r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v16i8_v16i8:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stb r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0
+ %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64>
+ %rhs.tmp = insertelement <16 x i8> undef, i8 %arg, i32 0
+ %rhs = bitcast <16 x i8> %rhs.tmp to <2 x i64>
+ %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_none_v16i8(i8 %arg1, ptr nocapture noundef readonly %b) {
+; CHECK-LE-P8-LABEL: test_none_v16i8:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_none_v16i8:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxv v2, 0(r4)
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_none_v16i8:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4
+; CHECK-BE-P8-NEXT: mtfprwz f0, r3
+; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_none_v16i8:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxv v2, 0(r4)
+; CHECK-BE-P9-NEXT: mtfprwz f0, r3
+; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_none_v16i8:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4
+; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_none_v16i8:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4)
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_none_v16i8:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT: lxvd2x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_none_v16i8:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4)
+; CHECK-AIX-32-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0
+ %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64>
+ %rhs = load <2 x i64>, ptr %b, align 4
+ %shuffle = shufflevector <2 x i64> %rhs, <2 x i64> %lhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_v16i8_none(i8 %arg1, ptr nocapture noundef readonly %b) {
+; CHECK-LE-P8-LABEL: test_v16i8_none:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v16i8_none:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxv v2, 0(r4)
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v16i8_none:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4
+; CHECK-BE-P8-NEXT: mtfprwz f0, r3
+; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, v2
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v16i8_none:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxv v2, 0(r4)
+; CHECK-BE-P9-NEXT: mtfprwz f0, r3
+; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, v2
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v16i8_none:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4
+; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v16i8_none:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4)
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v16i8_none:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT: lxvd2x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, v2
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v16i8_none:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4)
+; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, v2
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0
+ %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64>
+ %rhs = load <2 x i64>, ptr %b, align 4
+ %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_v16i8_v8i16(i8 %arg1, i16 %arg) {
+; CHECK-LE-P8-LABEL: test_v16i8_v8i16:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: xxswapd vs1, vs1
+; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v16i8_v8i16:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: mtfprd f1, r4
+; CHECK-LE-P9-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P9-NEXT: xxswapd vs1, vs1
+; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v16i8_v8i16:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: sldi r3, r3, 56
+; CHECK-BE-P8-NEXT: sldi r4, r4, 48
+; CHECK-BE-P8-NEXT: mtfprd f0, r3
+; CHECK-BE-P8-NEXT: mtfprd f1, r4
+; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v16i8_v8i16:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: sldi r3, r3, 56
+; CHECK-BE-P9-NEXT: mtfprd f0, r3
+; CHECK-BE-P9-NEXT: sldi r3, r4, 48
+; CHECK-BE-P9-NEXT: mtfprd f1, r3
+; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v16i8_v8i16:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56
+; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 48
+; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3
+; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v16i8_v8i16:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56
+; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3
+; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48
+; CHECK-AIX-64-P9-NEXT: mtfprd f1, r3
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v16i8_v8i16:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v16i8_v8i16:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0
+ %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64>
+ %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0
+ %rhs = bitcast <8 x i16> %rhs.tmp to <2 x i64>
+ %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_v8i16_v16i8(i8 %arg1, i16 %arg) {
+; CHECK-LE-P8-LABEL: test_v8i16_v16i8:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: xxswapd vs1, vs1
+; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, vs1
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v8i16_v16i8:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: mtfprd f1, r4
+; CHECK-LE-P9-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P9-NEXT: xxswapd vs1, vs1
+; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, vs1
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v8i16_v16i8:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: sldi r3, r3, 56
+; CHECK-BE-P8-NEXT: sldi r4, r4, 48
+; CHECK-BE-P8-NEXT: mtfprd f0, r3
+; CHECK-BE-P8-NEXT: mtfprd f1, r4
+; CHECK-BE-P8-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v8i16_v16i8:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: sldi r3, r3, 56
+; CHECK-BE-P9-NEXT: mtfprd f0, r3
+; CHECK-BE-P9-NEXT: sldi r3, r4, 48
+; CHECK-BE-P9-NEXT: mtfprd f1, r3
+; CHECK-BE-P9-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v8i16_v16i8:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56
+; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 48
+; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3
+; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v8i16_v16i8:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56
+; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3
+; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48
+; CHECK-AIX-64-P9-NEXT: mtfprd f1, r3
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v8i16_v16i8:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v8i16_v16i8:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0
+ %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64>
+ %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0
+ %rhs = bitcast <8 x i16> %rhs.tmp to <2 x i64>
+ %shuffle = shufflevector <2 x i64> %rhs, <2 x i64> %lhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_v8i16_none(i16 %arg1, ptr nocapture noundef readonly %b) {
+; CHECK-LE-P8-LABEL: test_v8i16_none:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v8i16_none:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxv v2, 0(r4)
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v8i16_none:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4
+; CHECK-BE-P8-NEXT: mtfprwz f0, r3
+; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, v2
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v8i16_none:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxv v2, 0(r4)
+; CHECK-BE-P9-NEXT: mtfprwz f0, r3
+; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, v2
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v8i16_none:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4
+; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v8i16_none:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4)
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v8i16_none:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT: lxvd2x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, v2
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v8i16_none:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4)
+; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, v2
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0
+ %lhs = bitcast <8 x i16> %lhs.tmp to <2 x i64>
+ %rhs = load <2 x i64>, ptr %b, align 4
+ %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_none_v8i16(i16 %arg1, ptr nocapture noundef readonly %b) {
+; CHECK-LE-P8-LABEL: test_none_v8i16:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_none_v8i16:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxv v2, 0(r4)
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_none_v8i16:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4
+; CHECK-BE-P8-NEXT: mtfprwz f0, r3
+; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_none_v8i16:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxv v2, 0(r4)
+; CHECK-BE-P9-NEXT: mtfprwz f0, r3
+; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_none_v8i16:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4
+; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_none_v8i16:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4)
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_none_v8i16:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT: lxvd2x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_none_v8i16:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4)
+; CHECK-AIX-32-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0
+ %lhs = bitcast <8 x i16> %lhs.tmp to <2 x i64>
+ %rhs = load <2 x i64>, ptr %b, align 4
+ %shuffle = shufflevector <2 x i64> %rhs, <2 x i64> %lhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_v16i8_v4i32(i8 %arg1, i32 %arg) {
+; CHECK-LE-P8-LABEL: test_v16i8_v4i32:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: xxswapd vs1, vs1
+; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v16i8_v4i32:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: mtvsrws vs1, r4
+; CHECK-LE-P9-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v16i8_v4i32:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: sldi r3, r3, 56
+; CHECK-BE-P8-NEXT: sldi r4, r4, 32
+; CHECK-BE-P8-NEXT: mtfprd f0, r3
+; CHECK-BE-P8-NEXT: mtfprd f1, r4
+; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v16i8_v4i32:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: sldi r3, r3, 56
+; CHECK-BE-P9-NEXT: mtvsrws vs1, r4
+; CHECK-BE-P9-NEXT: mtfprd f0, r3
+; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v16i8_v4i32:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56
+; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 32
+; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3
+; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v16i8_v4i32:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56
+; CHECK-AIX-64-P9-NEXT: mtvsrws vs1, r4
+; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v16i8_v4i32:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v16i8_v4i32:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0
+ %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64>
+ %rhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0
+ %rhs = bitcast <4 x i32> %rhs.tmp to <2 x i64>
+ %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_v4i32_v16i8(i8 %arg1, i32 %arg) {
+; CHECK-LE-P8-LABEL: test_v4i32_v16i8:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: xxswapd vs1, vs1
+; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, vs1
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v4i32_v16i8:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: mtvsrws vs1, r4
+; CHECK-LE-P9-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, vs1
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v4i32_v16i8:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: sldi r3, r3, 56
+; CHECK-BE-P8-NEXT: sldi r4, r4, 32
+; CHECK-BE-P8-NEXT: mtfprd f0, r3
+; CHECK-BE-P8-NEXT: mtfprd f1, r4
+; CHECK-BE-P8-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v4i32_v16i8:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: sldi r3, r3, 56
+; CHECK-BE-P9-NEXT: mtvsrws vs1, r4
+; CHECK-BE-P9-NEXT: mtfprd f0, r3
+; CHECK-BE-P9-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v4i32_v16i8:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56
+; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 32
+; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3
+; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v4i32_v16i8:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56
+; CHECK-AIX-64-P9-NEXT: mtvsrws vs1, r4
+; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v4i32_v16i8:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v4i32_v16i8:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0
+ %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64>
+ %rhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0
+ %rhs = bitcast <4 x i32> %rhs.tmp to <2 x i64>
+ %shuffle = shufflevector <2 x i64> %rhs, <2 x i64> %lhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_none_v4i32(i32 %arg1, ptr nocapture noundef readonly %b) {
+; CHECK-LE-P8-LABEL: test_none_v4i32:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: mtfprwz f0, r3
+; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_none_v4i32:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxv v2, 0(r4)
+; CHECK-LE-P9-NEXT: mtfprwz f0, r3
+; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_none_v4i32:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4
+; CHECK-BE-P8-NEXT: mtfprwz f0, r3
+; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_none_v4i32:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxv v2, 0(r4)
+; CHECK-BE-P9-NEXT: mtfprwz f0, r3
+; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4
+; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_none_v4i32:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4)
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_none_v4i32:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT: lxvd2x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_none_v4i32:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4)
+; CHECK-AIX-32-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <4 x i32> undef, i32 %arg1, i32 0
+ %lhs = bitcast <4 x i32> %lhs.tmp to <2 x i64>
+ %rhs = load <2 x i64>, ptr %b, align 4
+ %shuffle = shufflevector <2 x i64> %rhs, <2 x i64> %lhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_v4i32_none(i32 %arg1, ptr nocapture noundef readonly %b) {
+; CHECK-LE-P8-LABEL: test_v4i32_none:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: mtfprwz f0, r3
+; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v4i32_none:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxv v2, 0(r4)
+; CHECK-LE-P9-NEXT: mtfprwz f0, r3
+; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v4i32_none:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r4
+; CHECK-BE-P8-NEXT: mtfprwz f0, r3
+; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, v2
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v4i32_none:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxv v2, 0(r4)
+; CHECK-BE-P9-NEXT: mtfprwz f0, r3
+; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, v2
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v4i32_none:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r4
+; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v4i32_none:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r4)
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, v2
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v4i32_none:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT: lxvd2x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, v2
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v4i32_none:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r4)
+; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, v2
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <4 x i32> undef, i32 %arg1, i32 0
+ %lhs = bitcast <4 x i32> %lhs.tmp to <2 x i64>
+ %rhs = load <2 x i64>, ptr %b, align 4
+ %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_v16i8_v2i64(i8 %arg1, i64 %arg) {
+; CHECK-LE-P8-LABEL: test_v16i8_v2i64:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: xxswapd v2, vs1
+; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v16i8_v2i64:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: mtfprd f1, r4
+; CHECK-LE-P9-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P9-NEXT: xxswapd v2, vs1
+; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v16i8_v2i64:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: sldi r3, r3, 56
+; CHECK-BE-P8-NEXT: mtfprd f1, r4
+; CHECK-BE-P8-NEXT: mtfprd f0, r3
+; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v16i8_v2i64:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: sldi r3, r3, 56
+; CHECK-BE-P9-NEXT: mtfprd f1, r4
+; CHECK-BE-P9-NEXT: mtfprd f0, r3
+; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v16i8_v2i64:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 56
+; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4
+; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v16i8_v2i64:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 56
+; CHECK-AIX-64-P9-NEXT: mtfprd f1, r4
+; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v16i8_v2i64:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C0(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: addi r6, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r6
+; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: stw r5, -48(r1)
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C1(r2) # %const.1
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -48
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v16i8_v2i64:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r4
+; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 8
+; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r5
+; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 12
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0
+ %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64>
+ %rhs.tmp = insertelement <2 x i64> undef, i64 %arg, i32 0
+ %rhs = bitcast <2 x i64> %rhs.tmp to <2 x i64>
+ %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_v2i64_v16i8(i8 %arg1, i64 %arg) {
+; CHECK-LE-P8-LABEL: test_v2i64_v16i8:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: xxswapd v2, vs1
+; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v2i64_v16i8:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: mtfprd f1, r4
+; CHECK-LE-P9-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P9-NEXT: xxswapd v2, vs1
+; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v2i64_v16i8:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: mtfprd f0, r4
+; CHECK-BE-P8-NEXT: xxspltd v2, vs0, 0
+; CHECK-BE-P8-NEXT: mtfprwz f0, r3
+; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v2i64_v16i8:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: mtfprwz f0, r3
+; CHECK-BE-P9-NEXT: mtvsrdd v2, r4, r4
+; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v2i64_v16i8:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: mtfprd f0, r4
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs0
+; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v2i64_v16i8:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: mtvsrdd v2, r4, r4
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v2i64_v16i8:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r6, r1, -16
+; CHECK-AIX-32-P8-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r6
+; CHECK-AIX-32-P8-NEXT: stw r5, -32(r1)
+; CHECK-AIX-32-P8-NEXT: stw r4, -48(r1)
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -48
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs2, 0, r4
+; CHECK-AIX-32-P8-NEXT: xxmrghw vs1, vs2, vs1
+; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v2i64_v16i8:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stw r5, -32(r1)
+; CHECK-AIX-32-P9-NEXT: stw r4, -48(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs2, -48(r1)
+; CHECK-AIX-32-P9-NEXT: stb r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghw vs1, vs2, vs1
+; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <16 x i8> undef, i8 %arg1, i32 0
+ %lhs = bitcast <16 x i8> %lhs.tmp to <2 x i64>
+ %rhs.tmp = insertelement <2 x i64> undef, i64 %arg, i32 0
+ %rhs = bitcast <2 x i64> %rhs.tmp to <2 x i64>
+ %shuffle = shufflevector <2 x i64> %rhs, <2 x i64> %lhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_none_v2i64(ptr nocapture noundef readonly %b, i64 %arg) {
+; CHECK-LE-P8-LABEL: test_none_v2i64:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: mtfprd f0, r4
+; CHECK-LE-P8-NEXT: xxpermdi v2, vs0, v2, 1
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_none_v2i64:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxv v2, 0(r3)
+; CHECK-LE-P9-NEXT: mtfprd f0, r4
+; CHECK-LE-P9-NEXT: xxpermdi v2, vs0, v2, 1
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_none_v2i64:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r3
+; CHECK-BE-P8-NEXT: mtfprd f0, r4
+; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_none_v2i64:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxv v2, 0(r3)
+; CHECK-BE-P9-NEXT: mtfprd f0, r4
+; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_none_v2i64:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r3
+; CHECK-AIX-64-P8-NEXT: mtfprd f0, r4
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_none_v2i64:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT: mtfprd f0, r4
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_none_v2i64:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r6, L..C2(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT: stw r5, -32(r1)
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C3(r2) # %const.1
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r6
+; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v4, v2
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_none_v2i64:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r3)
+; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r4
+; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 8
+; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r5
+; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 12
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs = load <2 x i64>, ptr %b, align 4
+ %rhs = insertelement <2 x i64> undef, i64 %arg, i32 0
+ %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_v2i64_none(ptr nocapture noundef readonly %b, i64 %arg) {
+; CHECK-LE-P8-LABEL: test_v2i64_none:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: mtfprd f0, r4
+; CHECK-LE-P8-NEXT: xxpermdi v2, v2, vs0, 2
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v2i64_none:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxv v2, 0(r3)
+; CHECK-LE-P9-NEXT: mtfprd f0, r4
+; CHECK-LE-P9-NEXT: xxpermdi v2, v2, vs0, 2
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v2i64_none:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: mtfprd f0, r4
+; CHECK-BE-P8-NEXT: lxvd2x v2, 0, r3
+; CHECK-BE-P8-NEXT: xxspltd v3, vs0, 0
+; CHECK-BE-P8-NEXT: xxmrghd v2, v3, v2
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v2i64_none:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxv v2, 0(r3)
+; CHECK-BE-P9-NEXT: mtvsrdd v3, r4, r4
+; CHECK-BE-P9-NEXT: xxmrghd v2, v3, v2
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v2i64_none:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: mtfprd f0, r4
+; CHECK-AIX-64-P8-NEXT: lxvd2x v2, 0, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghd v3, vs0, vs0
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v3, v2
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v2i64_none:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxv v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT: mtvsrdd v3, r4, r4
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v3, v2
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v2i64_none:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lxvd2x v2, 0, r3
+; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1)
+; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4
+; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, v2
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v2i64_none:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxv v2, 0(r3)
+; CHECK-AIX-32-P9-NEXT: stw r5, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, v2
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs = load <2 x i64>, ptr %b, align 4
+ %rhs = insertelement <2 x i64> undef, i64 %arg, i32 0
+ %shuffle = shufflevector <2 x i64> %rhs, <2 x i64> %lhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_v8i16_v8i16(i16 %arg1, i16 %arg) {
+; CHECK-LE-P8-LABEL: test_v8i16_v8i16:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v8i16_v8i16:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: mtfprd f1, r4
+; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v8i16_v8i16:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: mtfprwz f0, r3
+; CHECK-BE-P8-NEXT: mtfprwz f1, r4
+; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v8i16_v8i16:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: mtfprwz f0, r3
+; CHECK-BE-P9-NEXT: mtfprwz f1, r4
+; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v8i16_v8i16:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0
+ %lhs = bitcast <8 x i16> %lhs.tmp to <2 x i64>
+ %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0
+ %rhs = bitcast <8 x i16> %rhs.tmp to <2 x i64>
+ %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_v8i16_v4i32(i16 %arg1, i32 %arg) {
+; CHECK-LE-P8-LABEL: test_v8i16_v4i32:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: xxswapd vs1, vs1
+; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v8i16_v4i32:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: mtvsrws vs1, r4
+; CHECK-LE-P9-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v8i16_v4i32:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: sldi r3, r3, 48
+; CHECK-BE-P8-NEXT: sldi r4, r4, 32
+; CHECK-BE-P8-NEXT: mtfprd f0, r3
+; CHECK-BE-P8-NEXT: mtfprd f1, r4
+; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v8i16_v4i32:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: sldi r3, r3, 48
+; CHECK-BE-P9-NEXT: mtvsrws vs1, r4
+; CHECK-BE-P9-NEXT: mtfprd f0, r3
+; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48
+; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 32
+; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3
+; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48
+; CHECK-AIX-64-P9-NEXT: mtvsrws vs1, r4
+; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0
+ %lhs = bitcast <8 x i16> %lhs.tmp to <2 x i64>
+ %rhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0
+ %rhs = bitcast <4 x i32> %rhs.tmp to <2 x i64>
+ %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_v8i16_v2i64(i16 %arg1, i64 %arg) {
+; CHECK-LE-P8-LABEL: test_v8i16_v2i64:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: xxswapd v2, vs1
+; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v8i16_v2i64:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: mtfprd f1, r4
+; CHECK-LE-P9-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P9-NEXT: xxswapd v2, vs1
+; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v8i16_v2i64:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: sldi r3, r3, 48
+; CHECK-BE-P8-NEXT: mtfprd f1, r4
+; CHECK-BE-P8-NEXT: mtfprd f0, r3
+; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v8i16_v2i64:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: sldi r3, r3, 48
+; CHECK-BE-P9-NEXT: mtfprd f1, r4
+; CHECK-BE-P9-NEXT: mtfprd f0, r3
+; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v8i16_v2i64:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48
+; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4
+; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v8i16_v2i64:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: sldi r3, r3, 48
+; CHECK-AIX-64-P9-NEXT: mtfprd f1, r4
+; CHECK-AIX-64-P9-NEXT: mtfprd f0, r3
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C4(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: addi r6, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r6
+; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: stw r5, -48(r1)
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C5(r2) # %const.1
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -48
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: sth r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r4
+; CHECK-AIX-32-P9-NEXT: lxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 8
+; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r5
+; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 12
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <8 x i16> undef, i16 %arg1, i32 0
+ %lhs = bitcast <8 x i16> %lhs.tmp to <2 x i64>
+ %rhs.tmp = insertelement <2 x i64> undef, i64 %arg, i32 0
+ %rhs = bitcast <2 x i64> %rhs.tmp to <2 x i64>
+ %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_v4i32_v4i32(i32 %arg1, i32 %arg) {
+; CHECK-LE-P8-LABEL: test_v4i32_v4i32:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprwz f0, r3
+; CHECK-LE-P8-NEXT: mtfprwz f1, r4
+; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v4i32_v4i32:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprwz f0, r3
+; CHECK-LE-P9-NEXT: mtfprwz f1, r4
+; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v4i32_v4i32:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: mtfprwz f0, r3
+; CHECK-BE-P8-NEXT: mtfprwz f1, r4
+; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v4i32_v4i32:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: mtfprwz f0, r3
+; CHECK-BE-P9-NEXT: mtfprwz f1, r4
+; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r3
+; CHECK-AIX-64-P9-NEXT: mtfprwz f1, r4
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <4 x i32> undef, i32 %arg1, i32 0
+ %lhs = bitcast <4 x i32> %lhs.tmp to <2 x i64>
+ %rhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0
+ %rhs = bitcast <4 x i32> %rhs.tmp to <2 x i64>
+ %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_v4i32_v8i16(i32 %arg1, i16 %arg) {
+; CHECK-LE-P8-LABEL: test_v4i32_v8i16:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: xxswapd vs1, vs1
+; CHECK-LE-P8-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v4i32_v8i16:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f1, r4
+; CHECK-LE-P9-NEXT: mtvsrws vs0, r3
+; CHECK-LE-P9-NEXT: xxswapd vs1, vs1
+; CHECK-LE-P9-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v4i32_v8i16:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: sldi r3, r3, 32
+; CHECK-BE-P8-NEXT: sldi r4, r4, 48
+; CHECK-BE-P8-NEXT: mtfprd f0, r3
+; CHECK-BE-P8-NEXT: mtfprd f1, r4
+; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v4i32_v8i16:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: mtvsrws vs0, r3
+; CHECK-BE-P9-NEXT: sldi r3, r4, 48
+; CHECK-BE-P9-NEXT: mtfprd f1, r3
+; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 32
+; CHECK-AIX-64-P8-NEXT: sldi r4, r4, 48
+; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3
+; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: mtvsrws vs0, r3
+; CHECK-AIX-64-P9-NEXT: sldi r3, r4, 48
+; CHECK-AIX-64-P9-NEXT: mtfprd f1, r3
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <4 x i32> undef, i32 %arg1, i32 0
+ %lhs = bitcast <4 x i32> %lhs.tmp to <2 x i64>
+ %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0
+ %rhs = bitcast <8 x i16> %rhs.tmp to <2 x i64>
+ %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_v4i32_v2i64(i32 %arg1, i64 %arg) {
+; CHECK-LE-P8-LABEL: test_v4i32_v2i64:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: xxswapd v2, vs1
+; CHECK-LE-P8-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v4i32_v2i64:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f1, r4
+; CHECK-LE-P9-NEXT: mtvsrws vs0, r3
+; CHECK-LE-P9-NEXT: xxswapd v2, vs1
+; CHECK-LE-P9-NEXT: xxmrgld v2, v2, vs0
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v4i32_v2i64:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: sldi r3, r3, 32
+; CHECK-BE-P8-NEXT: mtfprd f1, r4
+; CHECK-BE-P8-NEXT: mtfprd f0, r3
+; CHECK-BE-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v4i32_v2i64:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: mtvsrws vs0, r3
+; CHECK-BE-P9-NEXT: mtfprd f1, r4
+; CHECK-BE-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 32
+; CHECK-AIX-64-P8-NEXT: mtfprd f1, r4
+; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v4i32_v2i64:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: mtvsrws vs0, r3
+; CHECK-AIX-64-P9-NEXT: mtfprd f1, r4
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: stw r3, -48(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -48
+; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C6(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxmrghw v2, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stw r3, -48(r1)
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C0(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -48(r1)
+; CHECK-AIX-32-P9-NEXT: stw r5, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v4, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v3, 0(r3)
+; CHECK-AIX-32-P9-NEXT: xxmrghw v2, vs1, vs0
+; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v4, v3
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <4 x i32> undef, i32 %arg1, i32 0
+ %lhs = bitcast <4 x i32> %lhs.tmp to <2 x i64>
+ %rhs = insertelement <2 x i64> undef, i64 %arg, i32 0
+ %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_v2i64_v2i64(ptr nocapture noundef readonly %a, ptr nocapture noundef readonly %b) {
+; CHECK-LE-P8-LABEL: test_v2i64_v2i64:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: ld r3, 0(r3)
+; CHECK-LE-P8-NEXT: lfdx f0, 0, r4
+; CHECK-LE-P8-NEXT: mtfprd f1, r3
+; CHECK-LE-P8-NEXT: xxswapd v2, vs1
+; CHECK-LE-P8-NEXT: xxmrghd v3, vs0, vs1
+; CHECK-LE-P8-NEXT: vaddudm v2, v3, v2
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v2i64_v2i64:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: ld r3, 0(r3)
+; CHECK-LE-P9-NEXT: lfd f1, 0(r4)
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: xxswapd v2, vs0
+; CHECK-LE-P9-NEXT: xxmrghd v3, vs1, vs0
+; CHECK-LE-P9-NEXT: vaddudm v2, v3, v2
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v2i64_v2i64:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lxsdx v2, 0, r3
+; CHECK-BE-P8-NEXT: lfdx f0, 0, r4
+; CHECK-BE-P8-NEXT: xxmrghd v3, v2, vs0
+; CHECK-BE-P8-NEXT: vaddudm v2, v3, v2
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v2i64_v2i64:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsd v2, 0(r3)
+; CHECK-BE-P9-NEXT: lfd f0, 0(r4)
+; CHECK-BE-P9-NEXT: xxmrghd v3, v2, vs0
+; CHECK-BE-P9-NEXT: vaddudm v2, v3, v2
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v2i64_v2i64:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r3
+; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r4
+; CHECK-AIX-64-P8-NEXT: xxmrghd v3, v2, vs0
+; CHECK-AIX-64-P8-NEXT: vaddudm v2, v3, v2
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v2i64_v2i64:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r4)
+; CHECK-AIX-64-P9-NEXT: xxmrghd v3, v2, vs0
+; CHECK-AIX-64-P9-NEXT: vaddudm v2, v3, v2
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r5, 4(r3)
+; CHECK-AIX-32-P8-NEXT: addi r6, r1, -32
+; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lwz r3, 4(r4)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r5
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r6
+; CHECK-AIX-32-P8-NEXT: stw r3, -48(r1)
+; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4)
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -64
+; CHECK-AIX-32-P8-NEXT: xxmrghw v2, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: stw r3, -64(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -48
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs2, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs3, 0, r4
+; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs3, vs2
+; CHECK-AIX-32-P8-NEXT: xxmrghd v3, v2, vs0
+; CHECK-AIX-32-P8-NEXT: vaddudm v2, v3, v2
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lwz r5, 4(r3)
+; CHECK-AIX-32-P9-NEXT: stw r5, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lwz r3, 4(r4)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, -48(r1)
+; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4)
+; CHECK-AIX-32-P9-NEXT: xxmrghw v2, vs1, vs0
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -48(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, -64(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -64(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P9-NEXT: xxmrghd v3, v2, vs0
+; CHECK-AIX-32-P9-NEXT: vaddudm v2, v3, v2
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <8 x i8>, ptr %a, align 8
+ %bc1 = bitcast <8 x i8> %0 to i64
+ %vecinit3 = insertelement <2 x i64> poison, i64 %bc1, i64 0
+ %1 = load <8 x i8>, ptr %b, align 8
+ %bc2 = bitcast <8 x i8> %1 to i64
+ %vecinit6 = insertelement <2 x i64> undef, i64 %bc2, i64 0
+ %2 = bitcast <2 x i64> %vecinit3 to <2 x i64>
+ %3 = bitcast <2 x i64> %vecinit6 to <2 x i64>
+ %shuffle = shufflevector <2 x i64> %2, <2 x i64> %3, <2 x i32> <i32 0, i32 2>
+ %4 = add <2 x i64> %shuffle, %2
+ ret <2 x i64> %4
+}
+
+define <2 x i64> @test_v2i64_v4i32(i64 %arg1, i32 %arg) {
+; CHECK-LE-P8-LABEL: test_v2i64_v4i32:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs1
+; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v2i64_v4i32:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: xxswapd v2, vs0
+; CHECK-LE-P9-NEXT: mtvsrws vs0, r4
+; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v2i64_v4i32:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: mtfprd f0, r3
+; CHECK-BE-P8-NEXT: xxspltd v2, vs0, 0
+; CHECK-BE-P8-NEXT: mtfprwz f0, r4
+; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v2i64_v4i32:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: mtfprwz f0, r4
+; CHECK-BE-P9-NEXT: mtvsrdd v2, r3, r3
+; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v2i64_v4i32:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs0
+; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r4
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v2i64_v4i32:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4
+; CHECK-AIX-64-P9-NEXT: mtvsrdd v2, r3, r3
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r6, r1, -48
+; CHECK-AIX-32-P8-NEXT: stw r5, -48(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r6
+; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs2, 0, r4
+; CHECK-AIX-32-P8-NEXT: xxmrghw vs1, vs2, vs1
+; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs2, -32(r1)
+; CHECK-AIX-32-P9-NEXT: stw r5, -48(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -48(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghw vs1, vs2, vs1
+; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <2 x i64> undef, i64 %arg1, i32 0
+ %lhs = bitcast <2 x i64> %lhs.tmp to <2 x i64>
+ %rhs.tmp = insertelement <4 x i32> undef, i32 %arg, i32 0
+ %rhs = bitcast <4 x i32> %rhs.tmp to <2 x i64>
+ %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
+define <2 x i64> @test_v2i64_v8i16(i64 %arg1, i16 %arg) {
+; CHECK-LE-P8-LABEL: test_v2i64_v8i16:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: mtfprd f0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs1
+; CHECK-LE-P8-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v2i64_v8i16:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: mtfprd f0, r3
+; CHECK-LE-P9-NEXT: xxswapd v2, vs0
+; CHECK-LE-P9-NEXT: mtfprd f0, r4
+; CHECK-LE-P9-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P9-NEXT: xxmrgld v2, vs0, v2
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v2i64_v8i16:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: mtfprd f0, r3
+; CHECK-BE-P8-NEXT: xxspltd v2, vs0, 0
+; CHECK-BE-P8-NEXT: mtfprwz f0, r4
+; CHECK-BE-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v2i64_v8i16:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: mtfprwz f0, r4
+; CHECK-BE-P9-NEXT: mtvsrdd v2, r3, r3
+; CHECK-BE-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v2i64_v8i16:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: mtfprd f0, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, vs0, vs0
+; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r4
+; CHECK-AIX-64-P8-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v2i64_v8i16:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4
+; CHECK-AIX-64-P9-NEXT: mtvsrdd v2, r3, r3
+; CHECK-AIX-64-P9-NEXT: xxmrghd v2, v2, vs0
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r6, r1, -48
+; CHECK-AIX-32-P8-NEXT: sth r5, -48(r1)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r6
+; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs2, 0, r4
+; CHECK-AIX-32-P8-NEXT: xxmrghw vs1, vs2, vs1
+; CHECK-AIX-32-P8-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs2, -32(r1)
+; CHECK-AIX-32-P9-NEXT: sth r5, -48(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -48(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghw vs1, vs2, vs1
+; CHECK-AIX-32-P9-NEXT: xxmrghd v2, vs1, vs0
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %lhs.tmp = insertelement <2 x i64> undef, i64 %arg1, i32 0
+ %lhs = bitcast <2 x i64> %lhs.tmp to <2 x i64>
+ %rhs.tmp = insertelement <8 x i16> undef, i16 %arg, i32 0
+ %rhs = bitcast <8 x i16> %rhs.tmp to <2 x i64>
+ %shuffle = shufflevector <2 x i64> %lhs, <2 x i64> %rhs, <2 x i32> <i32 0, i32 2>
+ ret <2 x i64> %shuffle
+}
+
diff --git a/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
new file mode 100644
index 0000000000000..a8ca0f69f2cc8
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/v4i32_scalar_to_vector_shuffle.ll
@@ -0,0 +1,1445 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-LE-P8
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-LE-P9
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE-P8
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE-P9
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P8
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P8
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9
+
+define void @test_none_v8i16(ptr %a) {
+; CHECK-LE-P8-LABEL: test_none_v8i16:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI0_0 at toc@ha
+; CHECK-LE-P8-NEXT: lxsdx v2, 0, r3
+; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI0_0 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT: lhz r4, 0(r3)
+; CHECK-LE-P8-NEXT: mtvsrd v4, r4
+; CHECK-LE-P8-NEXT: xxswapd v3, vs0
+; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT: xxswapd vs0, v2
+; CHECK-LE-P8-NEXT: stfdx f0, 0, r3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_none_v8i16:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-LE-P9-NEXT: lxsd v3, 0(r3)
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI0_0 at toc@l
+; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-LE-P9-NEXT: xxswapd vs0, v2
+; CHECK-LE-P9-NEXT: stfd f0, 0(r3)
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_none_v8i16:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lhz r4, 0(r3)
+; CHECK-BE-P8-NEXT: lfdx f0, 0, r3
+; CHECK-BE-P8-NEXT: sldi r3, r4, 48
+; CHECK-BE-P8-NEXT: mtfprd f1, r3
+; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-BE-P8-NEXT: stfdx f0, 0, r3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_none_v8i16:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-BE-P9-NEXT: lfd f0, 0(r3)
+; CHECK-BE-P9-NEXT: vsplth v2, v2, 3
+; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, v2
+; CHECK-BE-P9-NEXT: stfd f0, 0(r3)
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_none_v8i16:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3)
+; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r3
+; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48
+; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: stfdx f0, 0, r3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_none_v8i16:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3
+; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, v2
+; CHECK-AIX-64-P9-NEXT: stfd f0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_none_v8i16:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3)
+; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4
+; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16
+; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r4
+; CHECK-AIX-32-P8-NEXT: stw r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: lwz r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: stw r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_none_v8i16:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3)
+; CHECK-AIX-32-P9-NEXT: stw r3, 0(r3)
+; CHECK-AIX-32-P9-NEXT: stxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lwz r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, 0(r3)
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <2 x i8>, ptr undef, align 1
+ %tmp0_1 = bitcast <2 x i8> %0 to i16
+ %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0
+ %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32>
+ %1 = load <2 x i32>, ptr %a
+ %tmp1_1 = shufflevector <2 x i32> %1, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %2 = shufflevector <4 x i32> %tmp0_3, <4 x i32> %tmp1_1, <2 x i32> <i32 4, i32 0>
+ store <2 x i32> %2, ptr undef, align 4
+ ret void
+}
+
+define void @test_v8i16_none(ptr %a) {
+; CHECK-LE-P8-LABEL: test_v8i16_none:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: lhz r3, 0(r3)
+; CHECK-LE-P8-NEXT: mtfprd f1, r3
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: xxmrglw vs0, vs0, vs1
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v8i16_none:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-LE-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-LE-P9-NEXT: xxmrglw vs0, vs1, vs0
+; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v8i16_none:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lhz r4, 0(r3)
+; CHECK-BE-P8-NEXT: lxvw4x vs0, 0, r3
+; CHECK-BE-P8-NEXT: mtfprwz f1, r4
+; CHECK-BE-P8-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v8i16_none:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-BE-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-BE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v8i16_none:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3)
+; CHECK-AIX-64-P8-NEXT: lxvw4x vs0, 0, r3
+; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r4
+; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v8i16_none:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-AIX-64-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v8i16_none:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: mtfprwz f1, r4
+; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v8i16_none:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-AIX-32-P9-NEXT: lxv vs1, 0(r3)
+; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <2 x i8>, ptr undef, align 1
+ %tmp0_1 = bitcast <2 x i8> %0 to i16
+ %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0
+ %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32>
+ %1 = load <4 x i32>, ptr %a, align 1
+ %2 = shufflevector <4 x i32> %tmp0_3, <4 x i32> %1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ store <4 x i32> %2, ptr undef, align 4
+ ret void
+}
+
+define void @test_none_v4i32(<2 x i32> %vec, ptr %ptr1) {
+; CHECK-LE-P8-LABEL: test_none_v4i32:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-LE-P8-NEXT: xxswapd vs0, v2
+; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI2_0 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r3
+; CHECK-LE-P8-NEXT: mffprwz r3, f0
+; CHECK-LE-P8-NEXT: xxswapd v3, vs1
+; CHECK-LE-P8-NEXT: mtvsrwz v4, r3
+; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT: xxswapd vs0, v2
+; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r5
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_none_v4i32:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: li r3, 0
+; CHECK-LE-P9-NEXT: vextuwrx r3, r3, v2
+; CHECK-LE-P9-NEXT: mtvsrwz v3, r3
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_0 at toc@l
+; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT: stxv v2, 0(r5)
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_none_v4i32:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: xxsldwi vs0, v2, v2, 3
+; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI2_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-BE-P8-NEXT: mffprwz r4, f0
+; CHECK-BE-P8-NEXT: mtvsrwz v4, r4
+; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r5
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_none_v4i32:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: li r3, 0
+; CHECK-BE-P9-NEXT: vextuwlx r3, r3, v2
+; CHECK-BE-P9-NEXT: mtvsrwz v3, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-BE-P9-NEXT: stxv v2, 0(r5)
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: xxsldwi vs0, v2, v2, 3
+; CHECK-AIX-64-P8-NEXT: ld r4, L..C0(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: mffprwz r5, f0
+; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r5
+; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_none_v4i32:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: li r4, 0
+; CHECK-AIX-64-P9-NEXT: vextuwlx r4, r4, v2
+; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r4
+; CHECK-AIX-64-P9-NEXT: ld r4, L..C0(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r4)
+; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_none_v4i32:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r4, L..C0(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r5
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r5
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_none_v4i32:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: addi r4, r1, -16
+; CHECK-AIX-32-P9-NEXT: stxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P9-NEXT: lwz r4, L..C0(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r4)
+; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3)
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = extractelement <2 x i32> %vec, i64 0
+ %1 = bitcast i32 %0 to <2 x i16>
+ %2 = shufflevector <2 x i16> %1, <2 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %3 = shufflevector <2 x i32> %vec, <2 x i32> %vec, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = bitcast <4 x i32> %3 to <8 x i16>
+ %5 = shufflevector <8 x i16> %4, <8 x i16> %2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 8, i32 9>
+ store <8 x i16> %5, ptr %ptr1, align 16
+ ret void
+}
+
+define void @test_v4i32_none(<2 x i32> %vec, ptr %ptr1) {
+; CHECK-LE-P8-LABEL: test_v4i32_none:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI3_0 at toc@ha
+; CHECK-LE-P8-NEXT: xxswapd vs0, v2
+; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI3_0 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r3
+; CHECK-LE-P8-NEXT: mffprwz r3, f0
+; CHECK-LE-P8-NEXT: xxswapd v3, vs1
+; CHECK-LE-P8-NEXT: mtvsrwz v4, r3
+; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-LE-P8-NEXT: xxswapd vs0, v2
+; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r5
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v4i32_none:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: li r3, 0
+; CHECK-LE-P9-NEXT: vextuwrx r3, r3, v2
+; CHECK-LE-P9-NEXT: mtvsrwz v3, r3
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI3_0 at toc@ha
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI3_0 at toc@l
+; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-LE-P9-NEXT: stxv v2, 0(r5)
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v4i32_none:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: xxsldwi vs0, v2, v2, 3
+; CHECK-BE-P8-NEXT: addis r3, r2, .LCPI3_0 at toc@ha
+; CHECK-BE-P8-NEXT: addi r3, r3, .LCPI3_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-BE-P8-NEXT: mffprwz r4, f0
+; CHECK-BE-P8-NEXT: mtvsrwz v4, r4
+; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r5
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v4i32_none:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: li r3, 0
+; CHECK-BE-P9-NEXT: vextuwlx r3, r3, v2
+; CHECK-BE-P9-NEXT: mtvsrwz v3, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI3_0 at toc@ha
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI3_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-BE-P9-NEXT: stxv v2, 0(r5)
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v4i32_none:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: xxsldwi vs0, v2, v2, 3
+; CHECK-AIX-64-P8-NEXT: ld r4, L..C1(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: mffprwz r5, f0
+; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r5
+; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v4i32_none:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: li r4, 0
+; CHECK-AIX-64-P9-NEXT: vextuwlx r4, r4, v2
+; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r4
+; CHECK-AIX-64-P9-NEXT: ld r4, L..C1(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r4)
+; CHECK-AIX-64-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v4i32_none:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r4, L..C1(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: addi r5, r1, -16
+; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r5
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r5
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r4
+; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v4i32_none:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: addi r4, r1, -16
+; CHECK-AIX-32-P9-NEXT: stxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P9-NEXT: lwz r4, L..C1(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r4)
+; CHECK-AIX-32-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3)
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = extractelement <2 x i32> %vec, i64 0
+ %1 = bitcast i32 %0 to <2 x i16>
+ %2 = shufflevector <2 x i16> %1, <2 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %3 = shufflevector <2 x i32> %vec, <2 x i32> %vec, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+ %4 = bitcast <4 x i32> %3 to <8 x i16>
+ %5 = shufflevector <8 x i16> %2, <8 x i16> %4, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13>
+ store <8 x i16> %5, ptr %ptr1, align 16
+ ret void
+}
+
+define void @test_none_v2i64(ptr %ptr, i32 %v1, <2 x i32> %vec) local_unnamed_addr #0 {
+; CHECK-LE-P8-LABEL: test_none_v2i64:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI4_0 at toc@ha
+; CHECK-LE-P8-NEXT: mtvsrwz v4, r4
+; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI4_1 at toc@ha
+; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI4_0 at toc@l
+; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI4_1 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5
+; CHECK-LE-P8-NEXT: xxswapd v3, vs0
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-LE-P8-NEXT: lxsdx v3, 0, r3
+; CHECK-LE-P8-NEXT: xxswapd v4, vs0
+; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-LE-P8-NEXT: xxswapd vs0, v2
+; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
+;
+; CHECK-LE-P9-LABEL: test_none_v2i64:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsd v3, 0(r3)
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI4_0 at toc@ha
+; CHECK-LE-P9-NEXT: mtfprwz f0, r4
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI4_0 at toc@l
+; CHECK-LE-P9-NEXT: xxinsertw v2, vs0, 12
+; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT: stxv v2, 0(r3)
+;
+; CHECK-BE-P8-LABEL: test_none_v2i64:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI4_0 at toc@ha
+; CHECK-BE-P8-NEXT: mtvsrwz v4, r4
+; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI4_1 at toc@ha
+; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI4_0 at toc@l
+; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI4_1 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r5
+; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-BE-P8-NEXT: lxsdx v3, 0, r3
+; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r4
+; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3
+;
+; CHECK-BE-P9-LABEL: test_none_v2i64:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsd v3, 0(r3)
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI4_0 at toc@ha
+; CHECK-BE-P9-NEXT: mtfprwz f0, r4
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI4_0 at toc@l
+; CHECK-BE-P9-NEXT: xxinsertw v2, vs0, 0
+; CHECK-BE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-BE-P9-NEXT: stxv v2, 0(r3)
+;
+; CHECK-AIX-64-P8-LABEL: test_none_v2i64:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: ld r5, L..C2(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r4
+; CHECK-AIX-64-P8-NEXT: ld r4, L..C3(r2) # %const.1
+; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r5
+; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r3
+; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r4
+; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3
+;
+; CHECK-AIX-64-P9-LABEL: test_none_v2i64:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r3)
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C2(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: mtfprwz f0, r4
+; CHECK-AIX-64-P9-NEXT: xxinsertw v2, vs0, 0
+; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3)
+;
+; CHECK-AIX-32-P8-LABEL: test_none_v2i64:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r5, L..C2(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x v5, 0, r3
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C3(r2) # %const.1
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5
+; CHECK-AIX-32-P8-NEXT: vperm v2, v5, v2, v4
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3
+;
+; CHECK-AIX-32-P9-LABEL: test_none_v2i64:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C2(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: mtfprwz f0, r4
+; CHECK-AIX-32-P9-NEXT: xxinsertw v2, vs0, 0
+; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3)
+entry:
+ %0 = load <2 x i32>, ptr %ptr, align 4
+ %tmp = insertelement <2 x i32> %vec, i32 %v1, i32 0
+ %1 = shufflevector <2 x i32> %0, <2 x i32> %tmp, <4 x i32> <i32 3, i32 2, i32 2, i32 0>
+ store <4 x i32> %1, ptr undef, align 4
+ unreachable
+}
+
+define void @test_v2i64_none() {
+; CHECK-LE-P8-LABEL: test_v2i64_none:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lfdx f0, 0, r3
+; CHECK-LE-P8-NEXT: xxmrghw vs0, vs0, vs0
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v2i64_none:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lfd f0, 0(r3)
+; CHECK-LE-P9-NEXT: xxmrghw vs0, vs0, vs0
+; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v2i64_none:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lfdx f0, 0, r3
+; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs0
+; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v2i64_none:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lfd f0, 0(r3)
+; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, vs0
+; CHECK-BE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v2i64_none:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs0
+; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v2i64_none:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, vs0
+; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v2i64_none:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lfiwzx f0, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxspltw vs0, vs0, 1
+; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs0
+; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v2i64_none:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxvwsx vs0, 0, r3
+; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs0
+; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <2 x i32>, ptr undef, align 4
+ %1 = shufflevector <2 x i32> %0, <2 x i32> undef, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+ store <4 x i32> %1, ptr undef, align 4
+ ret void
+}
+
+define void @test_v8i16_v8i16(ptr %a) {
+; CHECK-LE-P8-LABEL: test_v8i16_v8i16:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lhz r4, 0(r3)
+; CHECK-LE-P8-NEXT: lhz r3, 0(r3)
+; CHECK-LE-P8-NEXT: mtfprd f0, r4
+; CHECK-LE-P8-NEXT: mtfprd f1, r3
+; CHECK-LE-P8-NEXT: xxmrglw vs0, vs1, vs0
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v8i16_v8i16:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-LE-P9-NEXT: lxsihzx f1, 0, r3
+; CHECK-LE-P9-NEXT: xxmrglw vs0, vs1, vs0
+; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v8i16_v8i16:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lhz r4, 0(r3)
+; CHECK-BE-P8-NEXT: lhz r3, 0(r3)
+; CHECK-BE-P8-NEXT: mtfprwz f0, r4
+; CHECK-BE-P8-NEXT: mtfprwz f1, r3
+; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v8i16_v8i16:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-BE-P9-NEXT: lxsihzx f1, 0, r3
+; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-BE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3)
+; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3)
+; CHECK-AIX-64-P8-NEXT: mtfprwz f0, r4
+; CHECK-AIX-64-P8-NEXT: mtfprwz f1, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-AIX-64-P9-NEXT: lxsihzx f1, 0, r3
+; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3)
+; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: mtfprwz f0, r4
+; CHECK-AIX-32-P8-NEXT: mtfprwz f1, r3
+; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v8i16_v8i16:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxsihzx f0, 0, r3
+; CHECK-AIX-32-P9-NEXT: lxsihzx f1, 0, r3
+; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <2 x i8>, ptr undef, align 1
+ %tmp0_1 = bitcast <2 x i8> %0 to i16
+ %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0
+ %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32>
+ %1 = load <2 x i8>, ptr %a, align 1
+ %tmp1_1 = bitcast <2 x i8> %1 to i16
+ %tmp1_2 = insertelement <8 x i16> undef, i16 %tmp1_1, i32 0
+ %tmp1_3 = bitcast <8 x i16> %tmp1_2 to <4 x i32>
+ %2 = shufflevector <4 x i32> %tmp0_3, <4 x i32> %tmp1_3, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ store <4 x i32> %2, ptr undef, align 4
+ ret void
+}
+
+define void @test_v8i16_v4i32(ptr %a) {
+; CHECK-LE-P8-LABEL: test_v8i16_v4i32:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lhz r4, 0(r3)
+; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxswapd vs0, f0
+; CHECK-LE-P8-NEXT: xxswapd vs1, vs1
+; CHECK-LE-P8-NEXT: xxmrglw vs0, vs0, vs1
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v8i16_v4i32:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3
+; CHECK-LE-P9-NEXT: xxswapd vs0, f0
+; CHECK-LE-P9-NEXT: vsplth v2, v2, 3
+; CHECK-LE-P9-NEXT: xxmrglw vs0, vs0, v2
+; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v8i16_v4i32:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3
+; CHECK-BE-P8-NEXT: lhz r4, 0(r3)
+; CHECK-BE-P8-NEXT: sldi r3, r4, 48
+; CHECK-BE-P8-NEXT: xxsldwi vs0, f0, f0, 1
+; CHECK-BE-P8-NEXT: mtfprd f1, r3
+; CHECK-BE-P8-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v8i16_v4i32:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3
+; CHECK-BE-P9-NEXT: xxsldwi vs0, f0, f0, 1
+; CHECK-BE-P9-NEXT: vsplth v2, v2, 3
+; CHECK-BE-P9-NEXT: xxmrghw vs0, v2, vs0
+; CHECK-BE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3
+; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3)
+; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48
+; CHECK-AIX-64-P8-NEXT: xxsldwi vs0, f0, f0, 1
+; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3
+; CHECK-AIX-64-P9-NEXT: xxsldwi vs0, f0, f0, 1
+; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3
+; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, v2, vs0
+; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3)
+; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4
+; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <2 x i8>, ptr undef, align 1
+ %tmp0_1 = bitcast <2 x i8> %0 to i16
+ %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0
+ %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32>
+ %1 = load <2 x i16>, ptr %a, align 4
+ %tmp1_1 = bitcast <2 x i16> %1 to i32
+ %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0
+ %2 = shufflevector <4 x i32> %tmp0_3, <4 x i32> %tmp1_2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ store <4 x i32> %2, ptr undef, align 4
+ ret void
+}
+
+define void @test_v8i16_v2i64(ptr %a) {
+; CHECK-LE-P8-LABEL: test_v8i16_v2i64:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lhz r4, 0(r3)
+; CHECK-LE-P8-NEXT: lfdx f0, 0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxswapd vs0, f0
+; CHECK-LE-P8-NEXT: xxswapd vs1, vs1
+; CHECK-LE-P8-NEXT: xxmrglw vs0, vs0, vs1
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v8i16_v2i64:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-LE-P9-NEXT: lfd f0, 0(r3)
+; CHECK-LE-P9-NEXT: xxswapd vs0, f0
+; CHECK-LE-P9-NEXT: vsplth v2, v2, 3
+; CHECK-LE-P9-NEXT: xxmrglw vs0, vs0, v2
+; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v8i16_v2i64:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lhz r4, 0(r3)
+; CHECK-BE-P8-NEXT: lfdx f0, 0, r3
+; CHECK-BE-P8-NEXT: sldi r3, r4, 48
+; CHECK-BE-P8-NEXT: mtfprd f1, r3
+; CHECK-BE-P8-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v8i16_v2i64:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-BE-P9-NEXT: lfd f0, 0(r3)
+; CHECK-BE-P9-NEXT: vsplth v2, v2, 3
+; CHECK-BE-P9-NEXT: xxmrghw vs0, v2, vs0
+; CHECK-BE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v8i16_v2i64:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3)
+; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r3
+; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48
+; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v8i16_v2i64:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3
+; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, v2, vs0
+; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3)
+; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4
+; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <2 x i8>, ptr undef, align 1
+ %tmp0_1 = bitcast <2 x i8> %0 to i16
+ %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0
+ %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32>
+ %1 = load <2 x i16>, ptr %a, align 8
+ %tmp1_1 = bitcast <2 x i16> %1 to i32
+ %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0
+ %2 = shufflevector <4 x i32> %tmp0_3, <4 x i32> %tmp1_2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ store <4 x i32> %2, ptr undef, align 4
+ ret void
+}
+
+define <16 x i8> @test_v4i32_v4i32(ptr %a, ptr %b) {
+; CHECK-LE-P8-LABEL: test_v4i32_v4i32:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI9_0 at toc@ha
+; CHECK-LE-P8-NEXT: lxsiwzx v2, 0, r3
+; CHECK-LE-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI9_0 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5
+; CHECK-LE-P8-NEXT: xxswapd v4, vs0
+; CHECK-LE-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v4i32_v4i32:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI9_0 at toc@ha
+; CHECK-LE-P9-NEXT: lxsiwzx v3, 0, r4
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI9_0 at toc@l
+; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v4i32_v4i32:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI9_0 at toc@ha
+; CHECK-BE-P8-NEXT: lxsiwzx v2, 0, r3
+; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI9_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r5
+; CHECK-BE-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v4i32_v4i32:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI9_0 at toc@ha
+; CHECK-BE-P9-NEXT: lxsiwzx v3, 0, r4
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI9_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: ld r5, L..C4(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5
+; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r5, L..C4(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C3(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %load1 = load <4 x i8>, ptr %a
+ %load2 = load <4 x i8>, ptr %b
+ %shuffle1 = shufflevector <4 x i8> %load1, <4 x i8> %load2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+ %shuffle2 = shufflevector <8 x i8> %shuffle1, <8 x i8> %shuffle1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i8> %shuffle2
+}
+
+define void @test_v4i32_v8i16(ptr %a) {
+; CHECK-LE-P8-LABEL: test_v4i32_v8i16:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lhz r4, 0(r3)
+; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxswapd vs0, f0
+; CHECK-LE-P8-NEXT: xxswapd vs1, vs1
+; CHECK-LE-P8-NEXT: xxmrglw vs0, vs1, vs0
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v4i32_v8i16:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3
+; CHECK-LE-P9-NEXT: xxswapd vs0, f0
+; CHECK-LE-P9-NEXT: vsplth v2, v2, 3
+; CHECK-LE-P9-NEXT: xxmrglw vs0, v2, vs0
+; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v4i32_v8i16:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3
+; CHECK-BE-P8-NEXT: lhz r4, 0(r3)
+; CHECK-BE-P8-NEXT: sldi r3, r4, 48
+; CHECK-BE-P8-NEXT: xxsldwi vs0, f0, f0, 1
+; CHECK-BE-P8-NEXT: mtfprd f1, r3
+; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v4i32_v8i16:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3
+; CHECK-BE-P9-NEXT: xxsldwi vs0, f0, f0, 1
+; CHECK-BE-P9-NEXT: vsplth v2, v2, 3
+; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, v2
+; CHECK-BE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3
+; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3)
+; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48
+; CHECK-AIX-64-P8-NEXT: xxsldwi vs0, f0, f0, 1
+; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3
+; CHECK-AIX-64-P9-NEXT: xxsldwi vs0, f0, f0, 1
+; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3
+; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, v2
+; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3)
+; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4
+; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <2 x i8>, ptr undef, align 1
+ %tmp0_1 = bitcast <2 x i8> %0 to i16
+ %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0
+ %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32>
+ %1 = load <2 x i16>, ptr %a, align 4
+ %tmp1_1 = bitcast <2 x i16> %1 to i32
+ %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0
+ %2 = shufflevector <4 x i32> %tmp1_2, <4 x i32> %tmp0_3, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ store <4 x i32> %2, ptr undef, align 4
+ ret void
+}
+
+define void @test_v4i32_v2i64(ptr %a) {
+; CHECK-LE-P8-LABEL: test_v4i32_v2i64:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lfdx f0, 0, r3
+; CHECK-LE-P8-NEXT: lfiwzx f1, 0, r3
+; CHECK-LE-P8-NEXT: xxswapd vs0, f0
+; CHECK-LE-P8-NEXT: xxswapd vs1, f1
+; CHECK-LE-P8-NEXT: xxmrglw vs0, vs0, vs1
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v4i32_v2i64:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lfd f0, 0(r3)
+; CHECK-LE-P9-NEXT: lfiwzx f1, 0, r3
+; CHECK-LE-P9-NEXT: xxswapd vs0, f0
+; CHECK-LE-P9-NEXT: xxswapd vs1, f1
+; CHECK-LE-P9-NEXT: xxmrglw vs0, vs0, vs1
+; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v4i32_v2i64:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lfiwzx f1, 0, r3
+; CHECK-BE-P8-NEXT: lfdx f0, 0, r3
+; CHECK-BE-P8-NEXT: xxsldwi vs1, f1, f1, 1
+; CHECK-BE-P8-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v4i32_v2i64:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lfiwzx f1, 0, r3
+; CHECK-BE-P9-NEXT: lfd f0, 0(r3)
+; CHECK-BE-P9-NEXT: xxsldwi vs1, f1, f1, 1
+; CHECK-BE-P9-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-BE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lfiwzx f1, 0, r3
+; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r3
+; CHECK-AIX-64-P8-NEXT: xxsldwi vs1, f1, f1, 1
+; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v4i32_v2i64:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lfiwzx f1, 0, r3
+; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: xxsldwi vs1, f1, f1, 1
+; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r4, 0(r3)
+; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4
+; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lwz r4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3)
+; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <2 x i16>, ptr undef, align 8
+ %tmp0_1 = bitcast <2 x i16> %0 to i32
+ %tmp0_2 = insertelement <4 x i32> undef, i32 %tmp0_1, i32 0
+ %1 = load <2 x i16>, ptr %a, align 4
+ %tmp1_1 = bitcast <2 x i16> %1 to i32
+ %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0
+ %2 = shufflevector <4 x i32> %tmp1_2, <4 x i32> %tmp0_2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ store <4 x i32> %2, ptr undef, align 4
+ ret void
+}
+
+define void @test_v2i64_v2i64(ptr %a) {
+; CHECK-LE-P8-LABEL: test_v2i64_v2i64:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lfdx f0, 0, r3
+; CHECK-LE-P8-NEXT: lfdx f1, 0, r3
+; CHECK-LE-P8-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v2i64_v2i64:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lfd f0, 0(r3)
+; CHECK-LE-P9-NEXT: lfd f1, 0(r3)
+; CHECK-LE-P9-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v2i64_v2i64:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lfdx f0, 0, r3
+; CHECK-BE-P8-NEXT: lfdx f1, 0, r3
+; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v2i64_v2i64:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lfd f0, 0(r3)
+; CHECK-BE-P9-NEXT: lfd f1, 0(r3)
+; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-BE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v2i64_v2i64:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r3
+; CHECK-AIX-64-P8-NEXT: lfdx f1, 0, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v2i64_v2i64:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: lfd f1, 0(r3)
+; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r5, 4(r3)
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -16
+; CHECK-AIX-32-P8-NEXT: stw r5, -16(r1)
+; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: lfiwzx f2, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: xxspltw vs1, vs2, 1
+; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lwz r4, 4(r3)
+; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P9-NEXT: lxvwsx vs1, 0, r3
+; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <2 x i32>, ptr undef, align 4
+ %1 = load <2 x i32>, ptr %a, align 4
+ %2 = shufflevector <2 x i32> %0, <2 x i32> %1, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
+ store <4 x i32> %2, ptr undef, align 4
+ ret void
+}
+
+define void @test_v2i64_v4i32(ptr %a) {
+; CHECK-LE-P8-LABEL: test_v2i64_v4i32:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lfdx f0, 0, r3
+; CHECK-LE-P8-NEXT: lfiwzx f1, 0, r3
+; CHECK-LE-P8-NEXT: xxswapd vs0, f0
+; CHECK-LE-P8-NEXT: xxswapd vs1, f1
+; CHECK-LE-P8-NEXT: xxmrglw vs0, vs1, vs0
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v2i64_v4i32:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lfd f0, 0(r3)
+; CHECK-LE-P9-NEXT: lfiwzx f1, 0, r3
+; CHECK-LE-P9-NEXT: xxswapd vs0, f0
+; CHECK-LE-P9-NEXT: xxswapd vs1, f1
+; CHECK-LE-P9-NEXT: xxmrglw vs0, vs1, vs0
+; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v2i64_v4i32:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lfiwzx f1, 0, r3
+; CHECK-BE-P8-NEXT: lfdx f0, 0, r3
+; CHECK-BE-P8-NEXT: xxsldwi vs1, f1, f1, 1
+; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v2i64_v4i32:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lfiwzx f1, 0, r3
+; CHECK-BE-P9-NEXT: lfd f0, 0(r3)
+; CHECK-BE-P9-NEXT: xxsldwi vs1, f1, f1, 1
+; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-BE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v2i64_v4i32:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lfiwzx f1, 0, r3
+; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r3
+; CHECK-AIX-64-P8-NEXT: xxsldwi vs1, f1, f1, 1
+; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v2i64_v4i32:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lfiwzx f1, 0, r3
+; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: xxsldwi vs1, f1, f1, 1
+; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r4, 0(r3)
+; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4
+; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lwz r4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3)
+; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stw r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <2 x i16>, ptr undef, align 8
+ %tmp0_1 = bitcast <2 x i16> %0 to i32
+ %tmp0_2 = insertelement <4 x i32> undef, i32 %tmp0_1, i32 0
+ %1 = load <2 x i16>, ptr %a, align 4
+ %tmp1_1 = bitcast <2 x i16> %1 to i32
+ %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0
+ %2 = shufflevector <4 x i32> %tmp0_2, <4 x i32> %tmp1_2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ store <4 x i32> %2, ptr undef, align 4
+ ret void
+}
+
+define void @test_v2i64_v8i16(ptr %a) {
+; CHECK-LE-P8-LABEL: test_v2i64_v8i16:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lhz r4, 0(r3)
+; CHECK-LE-P8-NEXT: lfdx f0, 0, r3
+; CHECK-LE-P8-NEXT: mtfprd f1, r4
+; CHECK-LE-P8-NEXT: xxswapd vs0, f0
+; CHECK-LE-P8-NEXT: xxswapd vs1, vs1
+; CHECK-LE-P8-NEXT: xxmrglw vs0, vs1, vs0
+; CHECK-LE-P8-NEXT: xxswapd vs0, vs0
+; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v2i64_v8i16:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-LE-P9-NEXT: lfd f0, 0(r3)
+; CHECK-LE-P9-NEXT: xxswapd vs0, f0
+; CHECK-LE-P9-NEXT: vsplth v2, v2, 3
+; CHECK-LE-P9-NEXT: xxmrglw vs0, v2, vs0
+; CHECK-LE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v2i64_v8i16:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lhz r4, 0(r3)
+; CHECK-BE-P8-NEXT: lfdx f0, 0, r3
+; CHECK-BE-P8-NEXT: sldi r3, r4, 48
+; CHECK-BE-P8-NEXT: mtfprd f1, r3
+; CHECK-BE-P8-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-BE-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v2i64_v8i16:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-BE-P9-NEXT: lfd f0, 0(r3)
+; CHECK-BE-P9-NEXT: vsplth v2, v2, 3
+; CHECK-BE-P9-NEXT: xxmrghw vs0, vs0, v2
+; CHECK-BE-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v2i64_v8i16:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r3)
+; CHECK-AIX-64-P8-NEXT: lfdx f0, 0, r3
+; CHECK-AIX-64-P8-NEXT: sldi r3, r4, 48
+; CHECK-AIX-64-P8-NEXT: mtfprd f1, r3
+; CHECK-AIX-64-P8-NEXT: xxmrghw vs0, vs0, vs1
+; CHECK-AIX-64-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v2i64_v8i16:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT: lfd f0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3
+; CHECK-AIX-64-P9-NEXT: xxmrghw vs0, vs0, v2
+; CHECK-AIX-64-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r3)
+; CHECK-AIX-32-P8-NEXT: sth r4, -32(r1)
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r4
+; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: stxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lhz r4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: sth r4, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -32(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -16(r1)
+; CHECK-AIX-32-P9-NEXT: xxmrghw vs0, vs1, vs0
+; CHECK-AIX-32-P9-NEXT: stxv vs0, 0(r3)
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <2 x i8>, ptr undef, align 1
+ %tmp0_1 = bitcast <2 x i8> %0 to i16
+ %tmp0_2 = insertelement <8 x i16> undef, i16 %tmp0_1, i32 0
+ %tmp0_3 = bitcast <8 x i16> %tmp0_2 to <4 x i32>
+ %1 = load <2 x i16>, ptr %a, align 8
+ %tmp1_1 = bitcast <2 x i16> %1 to i32
+ %tmp1_2 = insertelement <4 x i32> undef, i32 %tmp1_1, i32 0
+ %2 = shufflevector <4 x i32> %tmp1_2, <4 x i32> %tmp0_3, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
+ store <4 x i32> %2, ptr undef, align 4
+ ret void
+}
diff --git a/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
new file mode 100644
index 0000000000000..933b7860de5c8
--- /dev/null
+++ b/llvm/test/CodeGen/PowerPC/v8i16_scalar_to_vector_shuffle.ll
@@ -0,0 +1,1554 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-LE-P8
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64le-unknown-linux-gnu < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-LE-P9
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE-P8
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-unknown-linux-gnu < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-BE-P9
+
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P8
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc64-ibm-aix < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-AIX-64-P9
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P8
+; RUN: llc -verify-machineinstrs -mcpu=pwr9 -ppc-asm-full-reg-names \
+; RUN: -ppc-vsr-nums-as-vr -mtriple=powerpc-ibm-aix < %s | \
+; RUN: FileCheck %s --check-prefix=CHECK-AIX-32-P9
+
+define void @test_none_v8i16(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %arg) {
+; CHECK-LE-P8-LABEL: test_none_v8i16:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI0_0 at toc@ha
+; CHECK-LE-P8-NEXT: lhz r3, 0(r3)
+; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI0_0 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT: mtvsrd v4, r3
+; CHECK-LE-P8-NEXT: xxswapd v3, vs0
+; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT: xxswapd vs0, v2
+; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_none_v8i16:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsihzx v3, 0, r3
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI0_0 at toc@l
+; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT: stxv v2, 0(r3)
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_none_v8i16:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI0_0 at toc@ha
+; CHECK-BE-P8-NEXT: lhz r3, 0(r3)
+; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI0_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-BE-P8-NEXT: mtvsrwz v4, r3
+; CHECK-BE-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_none_v8i16:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsihzx v3, 0, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI0_0 at toc@ha
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI0_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-BE-P9-NEXT: stxv v2, 0(r3)
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_none_v8i16:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: ld r4, L..C0(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3)
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3
+; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_none_v8i16:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsihzx v3, 0, r3
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C0(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_none_v8i16:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r4, L..C0(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: mtvsrwz v4, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_none_v8i16:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxsihzx v3, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C0(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3)
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %load0.tmp = load <2 x i8>, ptr %a0
+ %load0.tmp1 = bitcast <2 x i8> %load0.tmp to i16
+ %load0 = insertelement <8 x i16> %b, i16 %load0.tmp1, i64 0
+ %load1.tmp = insertelement <16 x i8> %a, i8 %arg, i32 0
+ %load1 = bitcast <16 x i8> %load1.tmp to <8 x i16>
+ %shuff = shufflevector <8 x i16> %load0, <8 x i16> %load1, <8 x i32> <i32 9, i32 0, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ store <8 x i16> %shuff, ptr undef
+ ret void
+}
+
+define void @test_v8i16_none(ptr %a0, ptr %a1, <16 x i8> %a, <8 x i16> %b, i8 %arg) {
+; CHECK-LE-P8-LABEL: test_v8i16_none:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI1_0 at toc@ha
+; CHECK-LE-P8-NEXT: mtvsrd v4, r9
+; CHECK-LE-P8-NEXT: lhz r3, 0(r3)
+; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI1_0 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI1_1 at toc@ha
+; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI1_1 at toc@l
+; CHECK-LE-P8-NEXT: xxswapd v3, vs0
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-LE-P8-NEXT: xxswapd v3, vs0
+; CHECK-LE-P8-NEXT: mtvsrd v4, r3
+; CHECK-LE-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-LE-P8-NEXT: xxswapd vs0, v2
+; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v8i16_none:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsihzx v3, 0, r3
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-LE-P9-NEXT: mtvsrwz v4, r9
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-LE-P9-NEXT: vinsertb v2, v4, 15
+; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-LE-P9-NEXT: stxv v2, 0(r3)
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v8i16_none:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-P8-NEXT: mtvsrwz v4, r9
+; CHECK-BE-P8-NEXT: lhz r3, 0(r3)
+; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI1_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI1_1 at toc@ha
+; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI1_1 at toc@l
+; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-BE-P8-NEXT: mtvsrwz v4, r3
+; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v8i16_none:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsihzx v3, 0, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI1_0 at toc@ha
+; CHECK-BE-P9-NEXT: mtvsrwz v4, r9
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI1_0 at toc@l
+; CHECK-BE-P9-NEXT: vinsertb v2, v4, 0
+; CHECK-BE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-BE-P9-NEXT: stxv v2, 0(r3)
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v8i16_none:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: ld r4, L..C1(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r5
+; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3)
+; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: ld r4, L..C2(r2) # %const.1
+; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r3
+; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v8i16_none:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsihzx v3, 0, r3
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C1(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: mtvsrwz v4, r5
+; CHECK-AIX-64-P9-NEXT: vinsertb v2, v4, 0
+; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v8i16_none:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r4, L..C1(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: mtvsrwz v4, r5
+; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-AIX-32-P8-NEXT: lwz r4, L..C2(r2) # %const.1
+; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-AIX-32-P8-NEXT: mtvsrwz v4, r3
+; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v8i16_none:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxsihzx v3, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C1(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: mtvsrwz v4, r5
+; CHECK-AIX-32-P9-NEXT: vinsertb v2, v4, 0
+; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3)
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %load0.tmp = load <2 x i8>, ptr %a0
+ %load0.tmp1 = bitcast <2 x i8> %load0.tmp to i16
+ %load0 = insertelement <8 x i16> %b, i16 %load0.tmp1, i64 0
+ %load1.tmp = insertelement <16 x i8> %a, i8 %arg, i32 0
+ %load1 = bitcast <16 x i8> %load1.tmp to <8 x i16>
+ %shuff = shufflevector <8 x i16> %load0, <8 x i16> %load1, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14>
+ store <8 x i16> %shuff, ptr undef
+ ret void
+}
+
+define void @test_none_v4i32(ptr %ptr, ptr %ptr2, i8 %v3) local_unnamed_addr #0 {
+; CHECK-LE-P8-LABEL: test_none_v4i32:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI2_0 at toc@ha
+; CHECK-LE-P8-NEXT: mtvsrd v3, r5
+; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI2_0 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI2_1 at toc@ha
+; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI2_1 at toc@l
+; CHECK-LE-P8-NEXT: xxswapd v2, vs0
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT: vperm v2, v3, v3, v2
+; CHECK-LE-P8-NEXT: lxsiwzx v3, 0, r3
+; CHECK-LE-P8-NEXT: xxswapd v4, vs0
+; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-LE-P8-NEXT: xxswapd vs0, v2
+; CHECK-LE-P8-NEXT: stfdx f0, 0, r3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_none_v4i32:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-LE-P9-NEXT: mtvsrd v3, r5
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_0 at toc@l
+; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI2_1 at toc@ha
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI2_1 at toc@l
+; CHECK-LE-P9-NEXT: vperm v3, v3, v3, v4
+; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT: xxswapd vs0, v2
+; CHECK-LE-P9-NEXT: stfd f0, 0(r3)
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_none_v4i32:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI2_0 at toc@ha
+; CHECK-BE-P8-NEXT: mtvsrwz v3, r5
+; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI2_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v2, 0, r4
+; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI2_1 at toc@ha
+; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI2_1 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r4
+; CHECK-BE-P8-NEXT: vperm v2, v3, v3, v2
+; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r3
+; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-BE-P8-NEXT: stxsdx v2, 0, r3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_none_v4i32:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_0 at toc@ha
+; CHECK-BE-P9-NEXT: mtvsrwz v3, r5
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI2_1 at toc@ha
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI2_1 at toc@l
+; CHECK-BE-P9-NEXT: vperm v3, v3, v3, v4
+; CHECK-BE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-BE-P9-NEXT: stxsd v2, 0(r3)
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_none_v4i32:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: ld r4, L..C3(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v3, r5
+; CHECK-AIX-64-P8-NEXT: lxvw4x v2, 0, r4
+; CHECK-AIX-64-P8-NEXT: ld r4, L..C4(r2) # %const.1
+; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v3, v2
+; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r3
+; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r4
+; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-64-P8-NEXT: stxsdx v2, 0, r3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_none_v4i32:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C2(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: mtvsrwz v3, r5
+; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C3(r2) # %const.1
+; CHECK-AIX-64-P9-NEXT: vperm v3, v3, v3, v4
+; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-64-P9-NEXT: stxsd v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_none_v4i32:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P8-NEXT: stb r5, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C3(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r3
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: vmrghh v3, v3, v3
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-AIX-32-P8-NEXT: lwz r3, -12(r1)
+; CHECK-AIX-32-P8-NEXT: stw r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: lwz r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: stw r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_none_v4i32:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C2(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: stb r5, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: vmrghh v3, v3, v3
+; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P9-NEXT: stxv v2, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lwz r3, -12(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, 0(r3)
+; CHECK-AIX-32-P9-NEXT: lwz r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, 0(r3)
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <2 x i16>, ptr %ptr, align 4
+ %tmp = insertelement <4 x i8> undef, i8 %v3, i32 0
+ %tmp0 = bitcast <4 x i8> %tmp to <2 x i16>
+ %1 = shufflevector <2 x i16> %0, <2 x i16> %tmp0, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+ store <4 x i16> %1, ptr undef, align 4
+ ret void
+}
+
+define void @test_v4i32_none(ptr nocapture readonly %ptr1, ptr nocapture readonly %ptr2) {
+; CHECK-LE-P8-LABEL: test_v4i32_none:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI3_0 at toc@ha
+; CHECK-LE-P8-NEXT: lxsiwzx v2, 0, r3
+; CHECK-LE-P8-NEXT: xxlxor v4, v4, v4
+; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI3_0 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT: xxswapd v3, vs0
+; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT: xxswapd vs0, v2
+; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v4i32_none:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI3_0 at toc@ha
+; CHECK-LE-P9-NEXT: xxlxor v4, v4, v4
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI3_0 at toc@l
+; CHECK-LE-P9-NEXT: lxv v3, 0(r3)
+; CHECK-LE-P9-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P9-NEXT: stxv v2, 0(r3)
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v4i32_none:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI3_0 at toc@ha
+; CHECK-BE-P8-NEXT: lxsiwzx v2, 0, r3
+; CHECK-BE-P8-NEXT: xxlxor v4, v4, v4
+; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI3_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v4i32_none:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI3_0 at toc@ha
+; CHECK-BE-P9-NEXT: xxlxor v4, v4, v4
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI3_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv v3, 0(r3)
+; CHECK-BE-P9-NEXT: vperm v2, v4, v2, v3
+; CHECK-BE-P9-NEXT: stxv v2, 0(r3)
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v4i32_none:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: ld r4, L..C5(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-64-P8-NEXT: xxlxor v4, v4, v4
+; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v4i32_none:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C4(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: xxlxor v4, v4, v4
+; CHECK-AIX-64-P9-NEXT: lxv v3, 0(r3)
+; CHECK-AIX-64-P9-NEXT: vperm v2, v4, v2, v3
+; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v4i32_none:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r4, L..C4(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxlxor v4, v4, v4
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v4i32_none:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C3(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: xxlxor v4, v4, v4
+; CHECK-AIX-32-P9-NEXT: lxv v3, 0(r3)
+; CHECK-AIX-32-P9-NEXT: vperm v2, v4, v2, v3
+; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3)
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <2 x i16>, ptr %ptr1, align 1
+ %1 = load <2 x i16>, ptr %ptr2, align 1
+ %shuffle1 = shufflevector <2 x i16> %0, <2 x i16> %1, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
+ %2 = zext <4 x i16> %shuffle1 to <4 x i32>
+ store <4 x i32> %2, ptr undef, align 16
+ ret void
+}
+
+define void @test_none_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonly %ptr2) {
+; CHECK-LE-P8-LABEL: test_none_v2i64:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI4_0 at toc@ha
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT: lxsdx v2, 0, r3
+; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI4_1 at toc@ha
+; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI4_0 at toc@l
+; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI4_1 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs1, 0, r5
+; CHECK-LE-P8-NEXT: xxswapd v3, vs0
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: xxswapd v4, vs1
+; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-LE-P8-NEXT: xxswapd v3, vs0
+; CHECK-LE-P8-NEXT: xxlxor v4, v4, v4
+; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT: xxswapd vs0, v2
+; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_none_v2i64:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsd v2, 0(r3)
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI4_0 at toc@ha
+; CHECK-LE-P9-NEXT: lxv v3, 0(r4)
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI4_0 at toc@l
+; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI4_1 at toc@ha
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI4_1 at toc@l
+; CHECK-LE-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-LE-P9-NEXT: lxv v3, 0(r3)
+; CHECK-LE-P9-NEXT: xxlxor v4, v4, v4
+; CHECK-LE-P9-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P9-NEXT: stxv v2, 0(r3)
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_none_v2i64:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI4_0 at toc@ha
+; CHECK-BE-P8-NEXT: lxsdx v2, 0, r3
+; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI4_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r5
+; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-BE-P8-NEXT: xxlxor v3, v3, v3
+; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_none_v2i64:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsd v2, 0(r3)
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI4_0 at toc@ha
+; CHECK-BE-P9-NEXT: lxv v3, 0(r4)
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI4_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-BE-P9-NEXT: xxlxor v3, v3, v3
+; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P9-NEXT: stxv v2, 0(r3)
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_none_v2i64:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: ld r5, L..C6(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r3
+; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5
+; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-64-P8-NEXT: xxlxor v3, v3, v3
+; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_none_v2i64:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C5(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: lxv v3, 0(r4)
+; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-64-P9-NEXT: xxlxor v3, v3, v3
+; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_none_v2i64:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r5, L..C5(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5
+; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-32-P8-NEXT: xxlxor v3, v3, v3
+; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_none_v2i64:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C4(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxv v3, 0(r4)
+; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-32-P9-NEXT: xxlxor v3, v3, v3
+; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3)
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <4 x i16>, ptr %ptr1, align 1
+ %1 = load <4 x i32>, ptr %ptr2, align 1
+ %bc = trunc <4 x i32> %1 to <4 x i16>
+ %shuffle1 = shufflevector <4 x i16> %0, <4 x i16> %bc, <4 x i32> <i32 4, i32 5, i32 1, i32 0>
+ %2 = zext <4 x i16> %shuffle1 to <4 x i32>
+ store <4 x i32> %2, ptr undef, align 16
+ ret void
+}
+
+define void @test_v2i64_none(ptr nocapture readonly %ptr1) {
+; CHECK-LE-P8-LABEL: test_v2i64_none:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r4, r2, .LCPI5_0 at toc@ha
+; CHECK-LE-P8-NEXT: lxsdx v2, 0, r3
+; CHECK-LE-P8-NEXT: xxlxor v4, v4, v4
+; CHECK-LE-P8-NEXT: addi r4, r4, .LCPI5_0 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r4
+; CHECK-LE-P8-NEXT: xxswapd v3, vs0
+; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT: xxswapd vs0, v2
+; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v2i64_none:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsd v2, 0(r3)
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI5_0 at toc@ha
+; CHECK-LE-P9-NEXT: xxlxor v4, v4, v4
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI5_0 at toc@l
+; CHECK-LE-P9-NEXT: lxv v3, 0(r3)
+; CHECK-LE-P9-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P9-NEXT: stxv v2, 0(r3)
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v2i64_none:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: addis r4, r2, .LCPI5_0 at toc@ha
+; CHECK-BE-P8-NEXT: lxsdx v2, 0, r3
+; CHECK-BE-P8-NEXT: xxlxor v4, v4, v4
+; CHECK-BE-P8-NEXT: addi r4, r4, .LCPI5_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-BE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v2i64_none:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsd v2, 0(r3)
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI5_0 at toc@ha
+; CHECK-BE-P9-NEXT: xxlxor v4, v4, v4
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI5_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv v3, 0(r3)
+; CHECK-BE-P9-NEXT: vperm v2, v4, v2, v3
+; CHECK-BE-P9-NEXT: stxv v2, 0(r3)
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v2i64_none:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: ld r4, L..C7(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r3
+; CHECK-AIX-64-P8-NEXT: xxlxor v4, v4, v4
+; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v2i64_none:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C6(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: xxlxor v4, v4, v4
+; CHECK-AIX-64-P9-NEXT: lxv v3, 0(r3)
+; CHECK-AIX-64-P9-NEXT: vperm v2, v4, v2, v3
+; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v2i64_none:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r4, 4(r3)
+; CHECK-AIX-32-P8-NEXT: xxlxor v4, v4, v4
+; CHECK-AIX-32-P8-NEXT: stw r4, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: stw r3, -32(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs0, 0, r3
+; CHECK-AIX-32-P8-NEXT: lwz r3, L..C6(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lxvw4x vs1, 0, r4
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: xxmrghw v2, vs1, vs0
+; CHECK-AIX-32-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v2i64_none:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lwz r4, 4(r3)
+; CHECK-AIX-32-P9-NEXT: xxlxor v4, v4, v4
+; CHECK-AIX-32-P9-NEXT: stw r4, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r3)
+; CHECK-AIX-32-P9-NEXT: lxv vs0, -16(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C5(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxv vs1, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v3, 0(r3)
+; CHECK-AIX-32-P9-NEXT: xxmrghw v2, vs1, vs0
+; CHECK-AIX-32-P9-NEXT: vperm v2, v4, v2, v3
+; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3)
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <4 x i16>, ptr %ptr1, align 1
+ %shuffle1 = shufflevector <4 x i16> %0, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+ %1 = zext <4 x i16> %shuffle1 to <4 x i32>
+ store <4 x i32> %1, ptr undef, align 16
+ ret void
+}
+
+define <16 x i8> @test_v8i16_v8i16(ptr %a, ptr %b) {
+; CHECK-LE-P8-LABEL: test_v8i16_v8i16:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI6_0 at toc@ha
+; CHECK-LE-P8-NEXT: lhz r3, 0(r3)
+; CHECK-LE-P8-NEXT: lhz r4, 0(r4)
+; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI6_0 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5
+; CHECK-LE-P8-NEXT: mtvsrd v2, r3
+; CHECK-LE-P8-NEXT: mtvsrd v4, r4
+; CHECK-LE-P8-NEXT: xxswapd v3, vs0
+; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v8i16_v8i16:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI6_0 at toc@ha
+; CHECK-LE-P9-NEXT: lxsihzx v3, 0, r4
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI6_0 at toc@l
+; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v8i16_v8i16:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI6_0 at toc@ha
+; CHECK-BE-P8-NEXT: lhz r3, 0(r3)
+; CHECK-BE-P8-NEXT: lhz r4, 0(r4)
+; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI6_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v2, 0, r5
+; CHECK-BE-P8-NEXT: mtvsrwz v3, r3
+; CHECK-BE-P8-NEXT: mtvsrwz v4, r4
+; CHECK-BE-P8-NEXT: vperm v2, v3, v4, v2
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v8i16_v8i16:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI6_0 at toc@ha
+; CHECK-BE-P9-NEXT: lxsihzx v3, 0, r4
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI6_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v8i16_v8i16:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: ld r5, L..C8(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3)
+; CHECK-AIX-64-P8-NEXT: lhz r4, 0(r4)
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v2, r3
+; CHECK-AIX-64-P8-NEXT: lxvw4x v3, 0, r5
+; CHECK-AIX-64-P8-NEXT: mtvsrwz v4, r4
+; CHECK-AIX-64-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v8i16_v8i16:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C7(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: lxsihzx v3, 0, r4
+; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v8i16_v8i16:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r5, L..C7(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: lhz r4, 0(r4)
+; CHECK-AIX-32-P8-NEXT: mtvsrwz v2, r3
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r5
+; CHECK-AIX-32-P8-NEXT: mtvsrwz v4, r4
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v4, v3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v8i16_v8i16:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C6(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxsihzx v3, 0, r4
+; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %load1 = load <2 x i8>, ptr %a
+ %load2 = load <2 x i8>, ptr %b
+ %shuffle1 = shufflevector <2 x i8> %load1, <2 x i8> %load2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+ %shuffle2 = shufflevector <8 x i8> %shuffle1, <8 x i8> %shuffle1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+ ret <16 x i8> %shuffle2
+}
+
+define <16 x i8> @test_v8i16_v4i32(ptr %a, ptr %b) local_unnamed_addr {
+; CHECK-LE-P8-LABEL: test_v8i16_v4i32:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lhz r3, 0(r3)
+; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r4
+; CHECK-LE-P8-NEXT: mtfprd f1, r3
+; CHECK-LE-P8-NEXT: xxswapd v3, f0
+; CHECK-LE-P8-NEXT: xxswapd v2, vs1
+; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v8i16_v4i32:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r4
+; CHECK-LE-P9-NEXT: xxswapd v3, f0
+; CHECK-LE-P9-NEXT: vsplth v2, v2, 3
+; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v8i16_v4i32:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r4
+; CHECK-BE-P8-NEXT: lhz r3, 0(r3)
+; CHECK-BE-P8-NEXT: sldi r3, r3, 48
+; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1
+; CHECK-BE-P8-NEXT: mtvsrd v3, r3
+; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v8i16_v4i32:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r4
+; CHECK-BE-P9-NEXT: xxsldwi v3, f0, f0, 1
+; CHECK-BE-P9-NEXT: vsplth v2, v2, 3
+; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v8i16_v4i32:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r4
+; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3)
+; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48
+; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1
+; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3
+; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v8i16_v4i32:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r4
+; CHECK-AIX-64-P9-NEXT: xxsldwi v3, f0, f0, 1
+; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3
+; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v8i16_v4i32:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4)
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v8i16_v4i32:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3)
+; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4)
+; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <2 x i8>, ptr %a
+ %bc1 = bitcast <2 x i8> %0 to i16
+ %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0
+ %1 = load <2 x i8>, ptr %b, align 4
+ %bc2 = bitcast <2 x i8> %1 to i16
+ %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0
+ %2 = bitcast <8 x i16> %vecinit3 to <16 x i8>
+ %3 = bitcast <8 x i16> %vecinit6 to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @test_v8i16_v2i64(ptr %a, ptr %b) local_unnamed_addr {
+; CHECK-LE-P8-LABEL: test_v8i16_v2i64:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lhz r3, 0(r3)
+; CHECK-LE-P8-NEXT: lfdx f0, 0, r4
+; CHECK-LE-P8-NEXT: mtfprd f1, r3
+; CHECK-LE-P8-NEXT: xxswapd v3, f0
+; CHECK-LE-P8-NEXT: xxswapd v2, vs1
+; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v8i16_v2i64:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-LE-P9-NEXT: lfd f0, 0(r4)
+; CHECK-LE-P9-NEXT: xxswapd v3, f0
+; CHECK-LE-P9-NEXT: vsplth v2, v2, 3
+; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v8i16_v2i64:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lhz r3, 0(r3)
+; CHECK-BE-P8-NEXT: lxsdx v2, 0, r4
+; CHECK-BE-P8-NEXT: sldi r3, r3, 48
+; CHECK-BE-P8-NEXT: mtvsrd v3, r3
+; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v8i16_v2i64:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-BE-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-BE-P9-NEXT: vsplth v2, v2, 3
+; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v8i16_v2i64:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3)
+; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r4
+; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48
+; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3
+; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v8i16_v2i64:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3
+; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v8i16_v2i64:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4)
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v8i16_v2i64:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3)
+; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4)
+; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <2 x i8>, ptr %a
+ %bc1 = bitcast <2 x i8> %0 to i16
+ %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0
+ %1 = load <2 x i8>, ptr %b, align 8
+ %bc2 = bitcast <2 x i8> %1 to i16
+ %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0
+ %2 = bitcast <8 x i16> %vecinit3 to <16 x i8>
+ %3 = bitcast <8 x i16> %vecinit6 to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define void @test_v4i32_v4i32(ptr nocapture readonly %ptr1, ptr nocapture readonly %ptr2) {
+; CHECK-LE-P8-LABEL: test_v4i32_v4i32:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI9_0 at toc@ha
+; CHECK-LE-P8-NEXT: lxsiwzx v2, 0, r3
+; CHECK-LE-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI9_1 at toc@ha
+; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI9_0 at toc@l
+; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI9_1 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5
+; CHECK-LE-P8-NEXT: xxswapd v4, vs0
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-LE-P8-NEXT: xxswapd v3, vs0
+; CHECK-LE-P8-NEXT: xxlxor v4, v4, v4
+; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT: xxswapd vs0, v2
+; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v4i32_v4i32:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI9_0 at toc@ha
+; CHECK-LE-P9-NEXT: lxsiwzx v3, 0, r4
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI9_0 at toc@l
+; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI9_1 at toc@ha
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI9_1 at toc@l
+; CHECK-LE-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-LE-P9-NEXT: lxv v3, 0(r3)
+; CHECK-LE-P9-NEXT: xxlxor v4, v4, v4
+; CHECK-LE-P9-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P9-NEXT: stxv v2, 0(r3)
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v4i32_v4i32:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI9_0 at toc@ha
+; CHECK-BE-P8-NEXT: lxsiwzx v2, 0, r3
+; CHECK-BE-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI9_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r5
+; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-BE-P8-NEXT: xxlxor v3, v3, v3
+; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v4i32_v4i32:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI9_0 at toc@ha
+; CHECK-BE-P9-NEXT: lxsiwzx v3, 0, r4
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI9_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-BE-P9-NEXT: xxlxor v3, v3, v3
+; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P9-NEXT: stxv v2, 0(r3)
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v4i32_v4i32:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: ld r5, L..C9(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-64-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5
+; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-64-P8-NEXT: xxlxor v3, v3, v3
+; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v4i32_v4i32:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C8(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-64-P9-NEXT: xxlxor v3, v3, v3
+; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v4i32_v4i32:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r5, L..C8(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5
+; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-32-P8-NEXT: xxlxor v3, v3, v3
+; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v4i32_v4i32:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C7(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-32-P9-NEXT: xxlxor v3, v3, v3
+; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3)
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <2 x i16>, ptr %ptr1, align 1
+ %1 = load <2 x i16>, ptr %ptr2, align 1
+ %shuffle1 = shufflevector <2 x i16> %0, <2 x i16> %1, <4 x i32> <i32 2, i32 3, i32 1, i32 0>
+ %2 = zext <4 x i16> %shuffle1 to <4 x i32>
+ store <4 x i32> %2, ptr undef, align 16
+ ret void
+}
+
+define <16 x i8> @test_v4i32_v8i16(ptr %a, ptr %b) local_unnamed_addr {
+; CHECK-LE-P8-LABEL: test_v4i32_v8i16:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lhz r3, 0(r3)
+; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r4
+; CHECK-LE-P8-NEXT: mtfprd f1, r3
+; CHECK-LE-P8-NEXT: xxswapd v3, f0
+; CHECK-LE-P8-NEXT: xxswapd v2, vs1
+; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v4i32_v8i16:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r4
+; CHECK-LE-P9-NEXT: xxswapd v3, f0
+; CHECK-LE-P9-NEXT: vsplth v2, v2, 3
+; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v4i32_v8i16:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r4
+; CHECK-BE-P8-NEXT: lhz r3, 0(r3)
+; CHECK-BE-P8-NEXT: sldi r3, r3, 48
+; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1
+; CHECK-BE-P8-NEXT: mtvsrd v3, r3
+; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v4i32_v8i16:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r4
+; CHECK-BE-P9-NEXT: xxsldwi v3, f0, f0, 1
+; CHECK-BE-P9-NEXT: vsplth v2, v2, 3
+; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v4i32_v8i16:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r4
+; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3)
+; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48
+; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1
+; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3
+; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v4i32_v8i16:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r4
+; CHECK-AIX-64-P9-NEXT: xxsldwi v3, f0, f0, 1
+; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3
+; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v4i32_v8i16:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4)
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v4i32_v8i16:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3)
+; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4)
+; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <2 x i8>, ptr %a
+ %bc1 = bitcast <2 x i8> %0 to i16
+ %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0
+ %1 = load <2 x i8>, ptr %b, align 4
+ %bc2 = bitcast <2 x i8> %1 to i16
+ %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0
+ %2 = bitcast <8 x i16> %vecinit3 to <16 x i8>
+ %3 = bitcast <8 x i16> %vecinit6 to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %3, <16 x i8> %2, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @test_v4i32_v2i64(ptr %a, ptr %b) local_unnamed_addr {
+; CHECK-LE-P8-LABEL: test_v4i32_v2i64:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3
+; CHECK-LE-P8-NEXT: lfdx f1, 0, r4
+; CHECK-LE-P8-NEXT: xxswapd v2, f0
+; CHECK-LE-P8-NEXT: xxswapd v3, f1
+; CHECK-LE-P8-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v4i32_v2i64:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3
+; CHECK-LE-P9-NEXT: xxswapd v2, f0
+; CHECK-LE-P9-NEXT: lfd f0, 0(r4)
+; CHECK-LE-P9-NEXT: xxswapd v3, f0
+; CHECK-LE-P9-NEXT: vmrglh v2, v3, v2
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v4i32_v2i64:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3
+; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1
+; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v4i32_v2i64:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3
+; CHECK-BE-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-BE-P9-NEXT: xxsldwi v2, f0, f0, 1
+; CHECK-BE-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v4i32_v2i64:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3
+; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1
+; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v4i32_v2i64:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3
+; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-AIX-64-P9-NEXT: xxsldwi v2, f0, f0, 1
+; CHECK-AIX-64-P9-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v4i32_v2i64:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r5, L..C9(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5
+; CHECK-AIX-32-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v4i32_v2i64:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C8(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <2 x i8>, ptr %a, align 4
+ %bc1 = bitcast <2 x i8> %0 to i16
+ %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0
+ %1 = load <2 x i8>, ptr %b, align 8
+ %bc2 = bitcast <2 x i8> %1 to i16
+ %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0
+ %2 = bitcast <8 x i16> %vecinit3 to <16 x i8>
+ %3 = bitcast <8 x i16> %vecinit6 to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %2, <16 x i8> %3, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define void @test_v2i64_v2i64(ptr nocapture readonly %ptr1, ptr nocapture readonly %ptr2) {
+; CHECK-LE-P8-LABEL: test_v2i64_v2i64:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: addis r5, r2, .LCPI12_0 at toc@ha
+; CHECK-LE-P8-NEXT: lxsdx v2, 0, r3
+; CHECK-LE-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-LE-P8-NEXT: addis r3, r2, .LCPI12_1 at toc@ha
+; CHECK-LE-P8-NEXT: addi r5, r5, .LCPI12_0 at toc@l
+; CHECK-LE-P8-NEXT: addi r3, r3, .LCPI12_1 at toc@l
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r5
+; CHECK-LE-P8-NEXT: xxswapd v4, vs0
+; CHECK-LE-P8-NEXT: lxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: vperm v2, v2, v3, v4
+; CHECK-LE-P8-NEXT: xxswapd v3, vs0
+; CHECK-LE-P8-NEXT: xxlxor v4, v4, v4
+; CHECK-LE-P8-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P8-NEXT: xxswapd vs0, v2
+; CHECK-LE-P8-NEXT: stxvd2x vs0, 0, r3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v2i64_v2i64:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsd v2, 0(r3)
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI12_0 at toc@ha
+; CHECK-LE-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI12_0 at toc@l
+; CHECK-LE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-LE-P9-NEXT: addis r3, r2, .LCPI12_1 at toc@ha
+; CHECK-LE-P9-NEXT: addi r3, r3, .LCPI12_1 at toc@l
+; CHECK-LE-P9-NEXT: vperm v2, v2, v3, v4
+; CHECK-LE-P9-NEXT: lxv v3, 0(r3)
+; CHECK-LE-P9-NEXT: xxlxor v4, v4, v4
+; CHECK-LE-P9-NEXT: vperm v2, v4, v2, v3
+; CHECK-LE-P9-NEXT: stxv v2, 0(r3)
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v2i64_v2i64:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: addis r5, r2, .LCPI12_0 at toc@ha
+; CHECK-BE-P8-NEXT: lxsdx v2, 0, r3
+; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-BE-P8-NEXT: addi r5, r5, .LCPI12_0 at toc@l
+; CHECK-BE-P8-NEXT: lxvw4x v4, 0, r5
+; CHECK-BE-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-BE-P8-NEXT: xxlxor v3, v3, v3
+; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v2i64_v2i64:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsd v2, 0(r3)
+; CHECK-BE-P9-NEXT: addis r3, r2, .LCPI12_0 at toc@ha
+; CHECK-BE-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-BE-P9-NEXT: addi r3, r3, .LCPI12_0 at toc@l
+; CHECK-BE-P9-NEXT: lxv v4, 0(r3)
+; CHECK-BE-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-BE-P9-NEXT: xxlxor v3, v3, v3
+; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P9-NEXT: stxv v2, 0(r3)
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v2i64_v2i64:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: ld r5, L..C10(r2) # %const.0
+; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r3
+; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: lxvw4x v4, 0, r5
+; CHECK-AIX-64-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-64-P8-NEXT: xxlxor v3, v3, v3
+; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v2i64_v2i64:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsd v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT: ld r3, L..C9(r2) # %const.0
+; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-AIX-64-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-64-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-64-P9-NEXT: xxlxor v3, v3, v3
+; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P9-NEXT: stxv v2, 0(r3)
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v2i64_v2i64:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r5, L..C10(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5
+; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-32-P8-NEXT: xxlxor v3, v3, v3
+; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P8-NEXT: stxvw4x v2, 0, r3
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v2i64_v2i64:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C9(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-32-P9-NEXT: xxlxor v3, v3, v3
+; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P9-NEXT: stxv v2, 0(r3)
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <4 x i16>, ptr %ptr1, align 1
+ %1 = load <4 x i16>, ptr %ptr2, align 1
+ %shuffle1 = shufflevector <4 x i16> %0, <4 x i16> %1, <4 x i32> <i32 4, i32 5, i32 1, i32 0>
+ %2 = zext <4 x i16> %shuffle1 to <4 x i32>
+ store <4 x i32> %2, ptr undef, align 16
+ ret void
+}
+
+define <16 x i8> @test_v2i64_v4i32(ptr %a, ptr %b) local_unnamed_addr {
+; CHECK-LE-P8-LABEL: test_v2i64_v4i32:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lfiwzx f0, 0, r3
+; CHECK-LE-P8-NEXT: lfdx f1, 0, r4
+; CHECK-LE-P8-NEXT: xxswapd v2, f0
+; CHECK-LE-P8-NEXT: xxswapd v3, f1
+; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v2i64_v4i32:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lfiwzx f0, 0, r3
+; CHECK-LE-P9-NEXT: xxswapd v2, f0
+; CHECK-LE-P9-NEXT: lfd f0, 0(r4)
+; CHECK-LE-P9-NEXT: xxswapd v3, f0
+; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v2i64_v4i32:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lfiwzx f0, 0, r3
+; CHECK-BE-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-BE-P8-NEXT: xxsldwi v2, f0, f0, 1
+; CHECK-BE-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v2i64_v4i32:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lfiwzx f0, 0, r3
+; CHECK-BE-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-BE-P9-NEXT: xxsldwi v2, f0, f0, 1
+; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v2i64_v4i32:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lfiwzx f0, 0, r3
+; CHECK-AIX-64-P8-NEXT: lxsdx v3, 0, r4
+; CHECK-AIX-64-P8-NEXT: xxsldwi v2, f0, f0, 1
+; CHECK-AIX-64-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v2i64_v4i32:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lfiwzx f0, 0, r3
+; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-AIX-64-P9-NEXT: xxsldwi v2, f0, f0, 1
+; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v2i64_v4i32:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lwz r5, L..C11(r2) # %const.0
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P8-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P8-NEXT: lxvw4x v4, 0, r5
+; CHECK-AIX-32-P8-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v2i64_v4i32:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v2, 0, r3
+; CHECK-AIX-32-P9-NEXT: lwz r3, L..C10(r2) # %const.0
+; CHECK-AIX-32-P9-NEXT: lxsiwzx v3, 0, r4
+; CHECK-AIX-32-P9-NEXT: lxv v4, 0(r3)
+; CHECK-AIX-32-P9-NEXT: vperm v2, v3, v2, v4
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <2 x i8>, ptr %a, align 4
+ %bc1 = bitcast <2 x i8> %0 to i16
+ %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0
+ %1 = load <2 x i8>, ptr %b, align 8
+ %bc2 = bitcast <2 x i8> %1 to i16
+ %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0
+ %2 = bitcast <8 x i16> %vecinit3 to <16 x i8>
+ %3 = bitcast <8 x i16> %vecinit6 to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %3, <16 x i8> %2, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
+
+define <16 x i8> @test_v2i64_v8i16(ptr %a, ptr %b) local_unnamed_addr {
+; CHECK-LE-P8-LABEL: test_v2i64_v8i16:
+; CHECK-LE-P8: # %bb.0: # %entry
+; CHECK-LE-P8-NEXT: lhz r3, 0(r3)
+; CHECK-LE-P8-NEXT: lfdx f0, 0, r4
+; CHECK-LE-P8-NEXT: mtfprd f1, r3
+; CHECK-LE-P8-NEXT: xxswapd v3, f0
+; CHECK-LE-P8-NEXT: xxswapd v2, vs1
+; CHECK-LE-P8-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P8-NEXT: blr
+;
+; CHECK-LE-P9-LABEL: test_v2i64_v8i16:
+; CHECK-LE-P9: # %bb.0: # %entry
+; CHECK-LE-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-LE-P9-NEXT: lfd f0, 0(r4)
+; CHECK-LE-P9-NEXT: xxswapd v3, f0
+; CHECK-LE-P9-NEXT: vsplth v2, v2, 3
+; CHECK-LE-P9-NEXT: vmrglh v2, v2, v3
+; CHECK-LE-P9-NEXT: blr
+;
+; CHECK-BE-P8-LABEL: test_v2i64_v8i16:
+; CHECK-BE-P8: # %bb.0: # %entry
+; CHECK-BE-P8-NEXT: lhz r3, 0(r3)
+; CHECK-BE-P8-NEXT: lxsdx v2, 0, r4
+; CHECK-BE-P8-NEXT: sldi r3, r3, 48
+; CHECK-BE-P8-NEXT: mtvsrd v3, r3
+; CHECK-BE-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-BE-P8-NEXT: blr
+;
+; CHECK-BE-P9-LABEL: test_v2i64_v8i16:
+; CHECK-BE-P9: # %bb.0: # %entry
+; CHECK-BE-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-BE-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-BE-P9-NEXT: vsplth v2, v2, 3
+; CHECK-BE-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-BE-P9-NEXT: blr
+;
+; CHECK-AIX-64-P8-LABEL: test_v2i64_v8i16:
+; CHECK-AIX-64-P8: # %bb.0: # %entry
+; CHECK-AIX-64-P8-NEXT: lhz r3, 0(r3)
+; CHECK-AIX-64-P8-NEXT: lxsdx v2, 0, r4
+; CHECK-AIX-64-P8-NEXT: sldi r3, r3, 48
+; CHECK-AIX-64-P8-NEXT: mtvsrd v3, r3
+; CHECK-AIX-64-P8-NEXT: vmrghh v2, v2, v3
+; CHECK-AIX-64-P8-NEXT: blr
+;
+; CHECK-AIX-64-P9-LABEL: test_v2i64_v8i16:
+; CHECK-AIX-64-P9: # %bb.0: # %entry
+; CHECK-AIX-64-P9-NEXT: lxsihzx v2, 0, r3
+; CHECK-AIX-64-P9-NEXT: lxsd v3, 0(r4)
+; CHECK-AIX-64-P9-NEXT: vsplth v2, v2, 3
+; CHECK-AIX-64-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-64-P9-NEXT: blr
+;
+; CHECK-AIX-32-P8-LABEL: test_v2i64_v8i16:
+; CHECK-AIX-32-P8: # %bb.0: # %entry
+; CHECK-AIX-32-P8-NEXT: lhz r3, 0(r3)
+; CHECK-AIX-32-P8-NEXT: sth r3, -32(r1)
+; CHECK-AIX-32-P8-NEXT: lwz r3, 0(r4)
+; CHECK-AIX-32-P8-NEXT: addi r4, r1, -32
+; CHECK-AIX-32-P8-NEXT: lxvw4x v2, 0, r4
+; CHECK-AIX-32-P8-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P8-NEXT: addi r3, r1, -16
+; CHECK-AIX-32-P8-NEXT: lxvw4x v3, 0, r3
+; CHECK-AIX-32-P8-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P8-NEXT: blr
+;
+; CHECK-AIX-32-P9-LABEL: test_v2i64_v8i16:
+; CHECK-AIX-32-P9: # %bb.0: # %entry
+; CHECK-AIX-32-P9-NEXT: lhz r3, 0(r3)
+; CHECK-AIX-32-P9-NEXT: sth r3, -32(r1)
+; CHECK-AIX-32-P9-NEXT: lwz r3, 0(r4)
+; CHECK-AIX-32-P9-NEXT: lxv v2, -32(r1)
+; CHECK-AIX-32-P9-NEXT: stw r3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: lxv v3, -16(r1)
+; CHECK-AIX-32-P9-NEXT: vmrghh v2, v3, v2
+; CHECK-AIX-32-P9-NEXT: blr
+entry:
+ %0 = load <2 x i8>, ptr %a
+ %bc1 = bitcast <2 x i8> %0 to i16
+ %vecinit3 = insertelement <8 x i16> poison, i16 %bc1, i64 0
+ %1 = load <2 x i8>, ptr %b, align 8
+ %bc2 = bitcast <2 x i8> %1 to i16
+ %vecinit6 = insertelement <8 x i16> undef, i16 %bc2, i64 0
+ %2 = bitcast <8 x i16> %vecinit3 to <16 x i8>
+ %3 = bitcast <8 x i16> %vecinit6 to <16 x i8>
+ %shuffle = shufflevector <16 x i8> %3, <16 x i8> %2, <16 x i32> <i32 0, i32 1, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+ ret <16 x i8> %shuffle
+}
More information about the llvm-commits
mailing list