[llvm] afd9582 - [PowerPC] Enhance test for PR #73609. NFC.
Kai Luo via llvm-commits
llvm-commits at lists.llvm.org
Wed Nov 29 21:09:56 PST 2023
Author: Kai Luo
Date: 2023-11-30T05:06:29Z
New Revision: afd9582b36ba87b48730f95ab42b79d07c29235e
URL: https://github.com/llvm/llvm-project/commit/afd9582b36ba87b48730f95ab42b79d07c29235e
DIFF: https://github.com/llvm/llvm-project/commit/afd9582b36ba87b48730f95ab42b79d07c29235e.diff
LOG: [PowerPC] Enhance test for PR #73609. NFC.
Added:
Modified:
llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
Removed:
################################################################################
diff --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
index 16c2617b3564931..cc32a76b22c2872 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
@@ -1,50 +1,843 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr7 < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr8 < %s | FileCheck --check-prefix=PWR8 %s
-
-define <2 x i64> @build_v2i64(ptr nocapture noundef readonly %p, <2 x i64> noundef %a) {
-; CHECK-LABEL: build_v2i64:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lwz 3, 0(3)
-; CHECK-NEXT: li 4, 0
-; CHECK-NEXT: std 4, -8(1)
-; CHECK-NEXT: std 3, -16(1)
-; CHECK-NEXT: addi 3, 1, -16
-; CHECK-NEXT: lxvd2x 34, 0, 3
-; CHECK-NEXT: blr
-;
-; PWR8-LABEL: build_v2i64:
-; PWR8: # %bb.0: # %entry
-; PWR8-NEXT: lwz 3, 0(3)
-; PWR8-NEXT: li 4, 0
-; PWR8-NEXT: mtfprd 0, 4
-; PWR8-NEXT: mtfprd 1, 3
-; PWR8-NEXT: xxmrghd 34, 1, 0
-; PWR8-NEXT: blr
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr7 < %s | FileCheck --check-prefix=PWR7-BE %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr8 < %s | FileCheck --check-prefix=PWR8-BE %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-- -mcpu=pwr7 < %s | FileCheck --check-prefix=PWR7-LE %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-- -mcpu=pwr8 < %s | FileCheck --check-prefix=PWR8-LE %s
+
+define <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v2i64_extload_0:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lwz 3, 0(3)
+; PWR7-BE-NEXT: li 4, 0
+; PWR7-BE-NEXT: std 4, -8(1)
+; PWR7-BE-NEXT: std 3, -16(1)
+; PWR7-BE-NEXT: addi 3, 1, -16
+; PWR7-BE-NEXT: lxvd2x 34, 0, 3
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: build_v2i64_extload_0:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lwz 3, 0(3)
+; PWR8-BE-NEXT: li 4, 0
+; PWR8-BE-NEXT: mtfprd 0, 4
+; PWR8-BE-NEXT: mtfprd 1, 3
+; PWR8-BE-NEXT: xxmrghd 34, 1, 0
+; PWR8-BE-NEXT: blr
+;
+; PWR7-LE-LABEL: build_v2i64_extload_0:
+; PWR7-LE: # %bb.0: # %entry
+; PWR7-LE-NEXT: li 4, 0
+; PWR7-LE-NEXT: lwz 3, 0(3)
+; PWR7-LE-NEXT: stw 4, -16(1)
+; PWR7-LE-NEXT: addis 4, 2, .LCPI0_0 at toc@ha
+; PWR7-LE-NEXT: addi 4, 4, .LCPI0_0 at toc@l
+; PWR7-LE-NEXT: stw 3, -32(1)
+; PWR7-LE-NEXT: addi 3, 1, -32
+; PWR7-LE-NEXT: lxvd2x 0, 0, 4
+; PWR7-LE-NEXT: addi 4, 1, -16
+; PWR7-LE-NEXT: lxvd2x 1, 0, 4
+; PWR7-LE-NEXT: xxswapd 34, 0
+; PWR7-LE-NEXT: lxvd2x 0, 0, 3
+; PWR7-LE-NEXT: xxswapd 35, 1
+; PWR7-LE-NEXT: xxswapd 36, 0
+; PWR7-LE-NEXT: vperm 2, 3, 4, 2
+; PWR7-LE-NEXT: blr
+;
+; PWR8-LE-LABEL: build_v2i64_extload_0:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lwz 3, 0(3)
+; PWR8-LE-NEXT: li 4, 0
+; PWR8-LE-NEXT: rldimi 3, 4, 32, 0
+; PWR8-LE-NEXT: rldimi 4, 4, 32, 0
+; PWR8-LE-NEXT: mtfprd 0, 3
+; PWR8-LE-NEXT: mtfprd 1, 4
+; PWR8-LE-NEXT: xxmrghd 34, 1, 0
+; PWR8-LE-NEXT: blr
entry:
%0 = load i32, ptr %p, align 4
%conv = zext i32 %0 to i64
- %vecinit1 = insertelement <2 x i64> <i64 poison, i64 0>, i64 %conv, i64 0
+ %vecinit1 = insertelement <2 x i64> <i64 0, i64 0>, i64 %conv, i64 0
ret <2 x i64> %vecinit1
}
-define <2 x double> @build_v2f64(ptr nocapture noundef readonly %p, <2 x double> noundef %a) {
-; CHECK-LABEL: build_v2f64:
-; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: lfs 0, 0(3)
-; CHECK-NEXT: xxlxor 1, 1, 1
-; CHECK-NEXT: xxmrghd 34, 0, 1
-; CHECK-NEXT: blr
+define <2 x i64> @build_v2i64_extload_1(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v2i64_extload_1:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lwz 3, 0(3)
+; PWR7-BE-NEXT: li 4, 0
+; PWR7-BE-NEXT: std 4, -16(1)
+; PWR7-BE-NEXT: std 3, -8(1)
+; PWR7-BE-NEXT: addi 3, 1, -16
+; PWR7-BE-NEXT: lxvd2x 34, 0, 3
+; PWR7-BE-NEXT: blr
;
-; PWR8-LABEL: build_v2f64:
-; PWR8: # %bb.0: # %entry
-; PWR8-NEXT: lfs 0, 0(3)
-; PWR8-NEXT: xxlxor 1, 1, 1
-; PWR8-NEXT: xxmrghd 34, 0, 1
-; PWR8-NEXT: blr
+; PWR8-BE-LABEL: build_v2i64_extload_1:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lwz 3, 0(3)
+; PWR8-BE-NEXT: li 4, 0
+; PWR8-BE-NEXT: mtfprd 0, 4
+; PWR8-BE-NEXT: mtfprd 1, 3
+; PWR8-BE-NEXT: xxmrghd 34, 0, 1
+; PWR8-BE-NEXT: blr
+;
+; PWR7-LE-LABEL: build_v2i64_extload_1:
+; PWR7-LE: # %bb.0: # %entry
+; PWR7-LE-NEXT: lwz 3, 0(3)
+; PWR7-LE-NEXT: li 4, 0
+; PWR7-LE-NEXT: std 4, -16(1)
+; PWR7-LE-NEXT: std 3, -8(1)
+; PWR7-LE-NEXT: addi 3, 1, -16
+; PWR7-LE-NEXT: lxvd2x 0, 0, 3
+; PWR7-LE-NEXT: xxswapd 34, 0
+; PWR7-LE-NEXT: blr
+;
+; PWR8-LE-LABEL: build_v2i64_extload_1:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lwz 3, 0(3)
+; PWR8-LE-NEXT: li 4, 0
+; PWR8-LE-NEXT: mtfprd 0, 4
+; PWR8-LE-NEXT: mtfprd 1, 3
+; PWR8-LE-NEXT: xxmrghd 34, 1, 0
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load i32, ptr %p, align 4
+ %conv = zext i32 %0 to i64
+ %vecinit1 = insertelement <2 x i64> <i64 0, i64 0>, i64 %conv, i64 1
+ ret <2 x i64> %vecinit1
+}
+
+define <2 x double> @build_v2f64_extload_0(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v2f64_extload_0:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lfs 0, 0(3)
+; PWR7-BE-NEXT: xxlxor 1, 1, 1
+; PWR7-BE-NEXT: xxmrghd 34, 0, 1
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: build_v2f64_extload_0:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lfs 0, 0(3)
+; PWR8-BE-NEXT: xxlxor 1, 1, 1
+; PWR8-BE-NEXT: xxmrghd 34, 0, 1
+; PWR8-BE-NEXT: blr
+;
+; PWR7-LE-LABEL: build_v2f64_extload_0:
+; PWR7-LE: # %bb.0: # %entry
+; PWR7-LE-NEXT: lfs 0, 0(3)
+; PWR7-LE-NEXT: xxlxor 1, 1, 1
+; PWR7-LE-NEXT: xxmrghd 34, 1, 0
+; PWR7-LE-NEXT: blr
+;
+; PWR8-LE-LABEL: build_v2f64_extload_0:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lfs 0, 0(3)
+; PWR8-LE-NEXT: xxlxor 1, 1, 1
+; PWR8-LE-NEXT: xxmrghd 34, 1, 0
+; PWR8-LE-NEXT: blr
entry:
%0 = load float, ptr %p, align 4
%conv = fpext float %0 to double
- %vecinit1 = insertelement <2 x double> <double poison, double 0.000000e+00>, double %conv, i64 0
+ %vecinit1 = insertelement <2 x double> <double 0.000000e+00, double 0.000000e+00>, double %conv, i64 0
+ ret <2 x double> %vecinit1
+}
+
+define <2 x double> @build_v2f64_extload_1(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v2f64_extload_1:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lfs 0, 0(3)
+; PWR7-BE-NEXT: xxlxor 1, 1, 1
+; PWR7-BE-NEXT: xxmrghd 34, 1, 0
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: build_v2f64_extload_1:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lfs 0, 0(3)
+; PWR8-BE-NEXT: xxlxor 1, 1, 1
+; PWR8-BE-NEXT: xxmrghd 34, 1, 0
+; PWR8-BE-NEXT: blr
+;
+; PWR7-LE-LABEL: build_v2f64_extload_1:
+; PWR7-LE: # %bb.0: # %entry
+; PWR7-LE-NEXT: lfs 0, 0(3)
+; PWR7-LE-NEXT: xxlxor 1, 1, 1
+; PWR7-LE-NEXT: xxmrghd 34, 0, 1
+; PWR7-LE-NEXT: blr
+;
+; PWR8-LE-LABEL: build_v2f64_extload_1:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lfs 0, 0(3)
+; PWR8-LE-NEXT: xxlxor 1, 1, 1
+; PWR8-LE-NEXT: xxmrghd 34, 0, 1
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load float, ptr %p, align 4
+ %conv = fpext float %0 to double
+ %vecinit1 = insertelement <2 x double> <double 0.000000e+00, double 0.000000e+00>, double %conv, i64 1
+ ret <2 x double> %vecinit1
+}
+
+define <2 x double> @build_v2f64_load_0(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v2f64_load_0:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lfd 0, 0(3)
+; PWR7-BE-NEXT: xxlxor 1, 1, 1
+; PWR7-BE-NEXT: xxmrghd 34, 0, 1
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: build_v2f64_load_0:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lfd 0, 0(3)
+; PWR8-BE-NEXT: xxlxor 1, 1, 1
+; PWR8-BE-NEXT: xxmrghd 34, 0, 1
+; PWR8-BE-NEXT: blr
+;
+; PWR7-LE-LABEL: build_v2f64_load_0:
+; PWR7-LE: # %bb.0: # %entry
+; PWR7-LE-NEXT: lfd 0, 0(3)
+; PWR7-LE-NEXT: xxlxor 1, 1, 1
+; PWR7-LE-NEXT: xxmrghd 34, 1, 0
+; PWR7-LE-NEXT: blr
+;
+; PWR8-LE-LABEL: build_v2f64_load_0:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lfd 0, 0(3)
+; PWR8-LE-NEXT: xxlxor 1, 1, 1
+; PWR8-LE-NEXT: xxmrghd 34, 1, 0
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load double, ptr %p, align 8
+ %vecinit1 = insertelement <2 x double> <double 0.000000e+00, double 0.000000e+00>, double %0, i64 0
+ ret <2 x double> %vecinit1
+}
+
+define <2 x double> @build_v2f64_load_1(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v2f64_load_1:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lfd 0, 0(3)
+; PWR7-BE-NEXT: xxlxor 1, 1, 1
+; PWR7-BE-NEXT: xxmrghd 34, 1, 0
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: build_v2f64_load_1:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lfd 0, 0(3)
+; PWR8-BE-NEXT: xxlxor 1, 1, 1
+; PWR8-BE-NEXT: xxmrghd 34, 1, 0
+; PWR8-BE-NEXT: blr
+;
+; PWR7-LE-LABEL: build_v2f64_load_1:
+; PWR7-LE: # %bb.0: # %entry
+; PWR7-LE-NEXT: lfd 0, 0(3)
+; PWR7-LE-NEXT: xxlxor 1, 1, 1
+; PWR7-LE-NEXT: xxmrghd 34, 0, 1
+; PWR7-LE-NEXT: blr
+;
+; PWR8-LE-LABEL: build_v2f64_load_1:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lfd 0, 0(3)
+; PWR8-LE-NEXT: xxlxor 1, 1, 1
+; PWR8-LE-NEXT: xxmrghd 34, 0, 1
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load double, ptr %p, align 8
+ %vecinit1 = insertelement <2 x double> <double 0.000000e+00, double 0.000000e+00>, double %0, i64 1
ret <2 x double> %vecinit1
}
+
+define <2 x i64> @build_v2i64_load_0(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v2i64_load_0:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: ld 3, 0(3)
+; PWR7-BE-NEXT: li 4, 0
+; PWR7-BE-NEXT: std 4, -8(1)
+; PWR7-BE-NEXT: std 3, -16(1)
+; PWR7-BE-NEXT: addi 3, 1, -16
+; PWR7-BE-NEXT: lxvd2x 34, 0, 3
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: build_v2i64_load_0:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: ld 3, 0(3)
+; PWR8-BE-NEXT: li 4, 0
+; PWR8-BE-NEXT: mtfprd 0, 4
+; PWR8-BE-NEXT: mtfprd 1, 3
+; PWR8-BE-NEXT: xxmrghd 34, 1, 0
+; PWR8-BE-NEXT: blr
+;
+; PWR7-LE-LABEL: build_v2i64_load_0:
+; PWR7-LE: # %bb.0: # %entry
+; PWR7-LE-NEXT: ld 3, 0(3)
+; PWR7-LE-NEXT: li 4, 0
+; PWR7-LE-NEXT: std 4, -8(1)
+; PWR7-LE-NEXT: std 3, -16(1)
+; PWR7-LE-NEXT: addi 3, 1, -16
+; PWR7-LE-NEXT: lxvd2x 0, 0, 3
+; PWR7-LE-NEXT: xxswapd 34, 0
+; PWR7-LE-NEXT: blr
+;
+; PWR8-LE-LABEL: build_v2i64_load_0:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: ld 3, 0(3)
+; PWR8-LE-NEXT: li 4, 0
+; PWR8-LE-NEXT: mtfprd 0, 4
+; PWR8-LE-NEXT: mtfprd 1, 3
+; PWR8-LE-NEXT: xxmrghd 34, 0, 1
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load i64, ptr %p, align 8
+ %vecinit1 = insertelement <2 x i64> <i64 0, i64 0>, i64 %0, i64 0
+ ret <2 x i64> %vecinit1
+}
+
+define <2 x i64> @build_v2i64_load_1(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v2i64_load_1:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: ld 3, 0(3)
+; PWR7-BE-NEXT: li 4, 0
+; PWR7-BE-NEXT: std 4, -16(1)
+; PWR7-BE-NEXT: std 3, -8(1)
+; PWR7-BE-NEXT: addi 3, 1, -16
+; PWR7-BE-NEXT: lxvd2x 34, 0, 3
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: build_v2i64_load_1:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: ld 3, 0(3)
+; PWR8-BE-NEXT: li 4, 0
+; PWR8-BE-NEXT: mtfprd 0, 4
+; PWR8-BE-NEXT: mtfprd 1, 3
+; PWR8-BE-NEXT: xxmrghd 34, 0, 1
+; PWR8-BE-NEXT: blr
+;
+; PWR7-LE-LABEL: build_v2i64_load_1:
+; PWR7-LE: # %bb.0: # %entry
+; PWR7-LE-NEXT: ld 3, 0(3)
+; PWR7-LE-NEXT: li 4, 0
+; PWR7-LE-NEXT: std 4, -16(1)
+; PWR7-LE-NEXT: std 3, -8(1)
+; PWR7-LE-NEXT: addi 3, 1, -16
+; PWR7-LE-NEXT: lxvd2x 0, 0, 3
+; PWR7-LE-NEXT: xxswapd 34, 0
+; PWR7-LE-NEXT: blr
+;
+; PWR8-LE-LABEL: build_v2i64_load_1:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: ld 3, 0(3)
+; PWR8-LE-NEXT: li 4, 0
+; PWR8-LE-NEXT: mtfprd 0, 4
+; PWR8-LE-NEXT: mtfprd 1, 3
+; PWR8-LE-NEXT: xxmrghd 34, 1, 0
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load i64, ptr %p, align 8
+ %vecinit1 = insertelement <2 x i64> <i64 0, i64 0>, i64 %0, i64 1
+ ret <2 x i64> %vecinit1
+}
+
+define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v4i32_load_0:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lwz 3, 0(3)
+; PWR7-BE-NEXT: li 4, 0
+; PWR7-BE-NEXT: stw 4, -16(1)
+; PWR7-BE-NEXT: stw 3, -32(1)
+; PWR7-BE-NEXT: addis 3, 2, .LCPI8_0 at toc@ha
+; PWR7-BE-NEXT: addi 3, 3, .LCPI8_0 at toc@l
+; PWR7-BE-NEXT: lxvw4x 34, 0, 3
+; PWR7-BE-NEXT: addi 3, 1, -16
+; PWR7-BE-NEXT: lxvw4x 35, 0, 3
+; PWR7-BE-NEXT: addi 3, 1, -32
+; PWR7-BE-NEXT: lxvw4x 36, 0, 3
+; PWR7-BE-NEXT: vperm 2, 4, 3, 2
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: build_v4i32_load_0:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lwz 3, 0(3)
+; PWR8-BE-NEXT: li 4, 0
+; PWR8-BE-NEXT: li 5, 0
+; PWR8-BE-NEXT: rldimi 4, 4, 32, 0
+; PWR8-BE-NEXT: rldimi 5, 3, 32, 0
+; PWR8-BE-NEXT: mtfprd 1, 4
+; PWR8-BE-NEXT: mtfprd 0, 5
+; PWR8-BE-NEXT: xxmrghd 34, 0, 1
+; PWR8-BE-NEXT: blr
+;
+; PWR7-LE-LABEL: build_v4i32_load_0:
+; PWR7-LE: # %bb.0: # %entry
+; PWR7-LE-NEXT: li 4, 0
+; PWR7-LE-NEXT: lwz 3, 0(3)
+; PWR7-LE-NEXT: stw 4, -16(1)
+; PWR7-LE-NEXT: addis 4, 2, .LCPI8_0 at toc@ha
+; PWR7-LE-NEXT: addi 4, 4, .LCPI8_0 at toc@l
+; PWR7-LE-NEXT: stw 3, -32(1)
+; PWR7-LE-NEXT: addi 3, 1, -32
+; PWR7-LE-NEXT: lxvd2x 0, 0, 4
+; PWR7-LE-NEXT: addi 4, 1, -16
+; PWR7-LE-NEXT: lxvd2x 1, 0, 4
+; PWR7-LE-NEXT: xxswapd 34, 0
+; PWR7-LE-NEXT: lxvd2x 0, 0, 3
+; PWR7-LE-NEXT: xxswapd 35, 1
+; PWR7-LE-NEXT: xxswapd 36, 0
+; PWR7-LE-NEXT: vperm 2, 3, 4, 2
+; PWR7-LE-NEXT: blr
+;
+; PWR8-LE-LABEL: build_v4i32_load_0:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lwz 3, 0(3)
+; PWR8-LE-NEXT: li 4, 0
+; PWR8-LE-NEXT: rldimi 3, 4, 32, 0
+; PWR8-LE-NEXT: rldimi 4, 4, 32, 0
+; PWR8-LE-NEXT: mtfprd 0, 3
+; PWR8-LE-NEXT: mtfprd 1, 4
+; PWR8-LE-NEXT: xxmrghd 34, 1, 0
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load i32, ptr %p, align 4
+ %vecinit1 = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i32 %0, i32 0
+ ret <4 x i32> %vecinit1
+}
+
+define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v4i32_load_1:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lwz 3, 0(3)
+; PWR7-BE-NEXT: li 4, 0
+; PWR7-BE-NEXT: stw 4, -32(1)
+; PWR7-BE-NEXT: stw 3, -16(1)
+; PWR7-BE-NEXT: addis 3, 2, .LCPI9_0 at toc@ha
+; PWR7-BE-NEXT: addi 3, 3, .LCPI9_0 at toc@l
+; PWR7-BE-NEXT: lxvw4x 34, 0, 3
+; PWR7-BE-NEXT: addi 3, 1, -32
+; PWR7-BE-NEXT: lxvw4x 35, 0, 3
+; PWR7-BE-NEXT: addi 3, 1, -16
+; PWR7-BE-NEXT: lxvw4x 36, 0, 3
+; PWR7-BE-NEXT: vperm 2, 3, 4, 2
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: build_v4i32_load_1:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lwz 3, 0(3)
+; PWR8-BE-NEXT: li 4, 0
+; PWR8-BE-NEXT: rldimi 3, 4, 32, 0
+; PWR8-BE-NEXT: rldimi 4, 4, 32, 0
+; PWR8-BE-NEXT: mtfprd 0, 3
+; PWR8-BE-NEXT: mtfprd 1, 4
+; PWR8-BE-NEXT: xxmrghd 34, 0, 1
+; PWR8-BE-NEXT: blr
+;
+; PWR7-LE-LABEL: build_v4i32_load_1:
+; PWR7-LE: # %bb.0: # %entry
+; PWR7-LE-NEXT: li 4, 0
+; PWR7-LE-NEXT: lwz 3, 0(3)
+; PWR7-LE-NEXT: stw 4, -32(1)
+; PWR7-LE-NEXT: addis 4, 2, .LCPI9_0 at toc@ha
+; PWR7-LE-NEXT: addi 4, 4, .LCPI9_0 at toc@l
+; PWR7-LE-NEXT: stw 3, -16(1)
+; PWR7-LE-NEXT: addi 3, 1, -16
+; PWR7-LE-NEXT: lxvd2x 0, 0, 4
+; PWR7-LE-NEXT: addi 4, 1, -32
+; PWR7-LE-NEXT: lxvd2x 1, 0, 4
+; PWR7-LE-NEXT: xxswapd 34, 0
+; PWR7-LE-NEXT: lxvd2x 0, 0, 3
+; PWR7-LE-NEXT: xxswapd 35, 1
+; PWR7-LE-NEXT: xxswapd 36, 0
+; PWR7-LE-NEXT: vperm 2, 4, 3, 2
+; PWR7-LE-NEXT: blr
+;
+; PWR8-LE-LABEL: build_v4i32_load_1:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lwz 3, 0(3)
+; PWR8-LE-NEXT: li 4, 0
+; PWR8-LE-NEXT: li 5, 0
+; PWR8-LE-NEXT: rldimi 4, 4, 32, 0
+; PWR8-LE-NEXT: rldimi 5, 3, 32, 0
+; PWR8-LE-NEXT: mtfprd 1, 4
+; PWR8-LE-NEXT: mtfprd 0, 5
+; PWR8-LE-NEXT: xxmrghd 34, 1, 0
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load i32, ptr %p, align 4
+ %vecinit1 = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i32 %0, i32 1
+ ret <4 x i32> %vecinit1
+}
+
+define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v4i32_load_2:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lwz 3, 0(3)
+; PWR7-BE-NEXT: li 4, 0
+; PWR7-BE-NEXT: stw 4, -32(1)
+; PWR7-BE-NEXT: stw 3, -16(1)
+; PWR7-BE-NEXT: addis 3, 2, .LCPI10_0 at toc@ha
+; PWR7-BE-NEXT: addi 3, 3, .LCPI10_0 at toc@l
+; PWR7-BE-NEXT: lxvw4x 34, 0, 3
+; PWR7-BE-NEXT: addi 3, 1, -32
+; PWR7-BE-NEXT: lxvw4x 35, 0, 3
+; PWR7-BE-NEXT: addi 3, 1, -16
+; PWR7-BE-NEXT: lxvw4x 36, 0, 3
+; PWR7-BE-NEXT: vperm 2, 3, 4, 2
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: build_v4i32_load_2:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lwz 3, 0(3)
+; PWR8-BE-NEXT: li 4, 0
+; PWR8-BE-NEXT: li 5, 0
+; PWR8-BE-NEXT: rldimi 4, 4, 32, 0
+; PWR8-BE-NEXT: rldimi 5, 3, 32, 0
+; PWR8-BE-NEXT: mtfprd 1, 4
+; PWR8-BE-NEXT: mtfprd 0, 5
+; PWR8-BE-NEXT: xxmrghd 34, 1, 0
+; PWR8-BE-NEXT: blr
+;
+; PWR7-LE-LABEL: build_v4i32_load_2:
+; PWR7-LE: # %bb.0: # %entry
+; PWR7-LE-NEXT: li 4, 0
+; PWR7-LE-NEXT: lwz 3, 0(3)
+; PWR7-LE-NEXT: stw 4, -32(1)
+; PWR7-LE-NEXT: addis 4, 2, .LCPI10_0 at toc@ha
+; PWR7-LE-NEXT: addi 4, 4, .LCPI10_0 at toc@l
+; PWR7-LE-NEXT: stw 3, -16(1)
+; PWR7-LE-NEXT: addi 3, 1, -16
+; PWR7-LE-NEXT: lxvd2x 0, 0, 4
+; PWR7-LE-NEXT: addi 4, 1, -32
+; PWR7-LE-NEXT: lxvd2x 1, 0, 4
+; PWR7-LE-NEXT: xxswapd 34, 0
+; PWR7-LE-NEXT: lxvd2x 0, 0, 3
+; PWR7-LE-NEXT: xxswapd 35, 1
+; PWR7-LE-NEXT: xxswapd 36, 0
+; PWR7-LE-NEXT: vperm 2, 4, 3, 2
+; PWR7-LE-NEXT: blr
+;
+; PWR8-LE-LABEL: build_v4i32_load_2:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lwz 3, 0(3)
+; PWR8-LE-NEXT: li 4, 0
+; PWR8-LE-NEXT: rldimi 3, 4, 32, 0
+; PWR8-LE-NEXT: rldimi 4, 4, 32, 0
+; PWR8-LE-NEXT: mtfprd 0, 3
+; PWR8-LE-NEXT: mtfprd 1, 4
+; PWR8-LE-NEXT: xxmrghd 34, 0, 1
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load i32, ptr %p, align 4
+ %vecinit1 = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i32 %0, i32 2
+ ret <4 x i32> %vecinit1
+}
+
+define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v4i32_load_3:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lwz 3, 0(3)
+; PWR7-BE-NEXT: li 4, 0
+; PWR7-BE-NEXT: stw 4, -32(1)
+; PWR7-BE-NEXT: stw 3, -16(1)
+; PWR7-BE-NEXT: addis 3, 2, .LCPI11_0 at toc@ha
+; PWR7-BE-NEXT: addi 3, 3, .LCPI11_0 at toc@l
+; PWR7-BE-NEXT: lxvw4x 34, 0, 3
+; PWR7-BE-NEXT: addi 3, 1, -32
+; PWR7-BE-NEXT: lxvw4x 35, 0, 3
+; PWR7-BE-NEXT: addi 3, 1, -16
+; PWR7-BE-NEXT: lxvw4x 36, 0, 3
+; PWR7-BE-NEXT: vperm 2, 3, 4, 2
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: build_v4i32_load_3:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lwz 3, 0(3)
+; PWR8-BE-NEXT: li 4, 0
+; PWR8-BE-NEXT: rldimi 3, 4, 32, 0
+; PWR8-BE-NEXT: rldimi 4, 4, 32, 0
+; PWR8-BE-NEXT: mtfprd 0, 3
+; PWR8-BE-NEXT: mtfprd 1, 4
+; PWR8-BE-NEXT: xxmrghd 34, 1, 0
+; PWR8-BE-NEXT: blr
+;
+; PWR7-LE-LABEL: build_v4i32_load_3:
+; PWR7-LE: # %bb.0: # %entry
+; PWR7-LE-NEXT: li 4, 0
+; PWR7-LE-NEXT: lwz 3, 0(3)
+; PWR7-LE-NEXT: stw 4, -32(1)
+; PWR7-LE-NEXT: addis 4, 2, .LCPI11_0 at toc@ha
+; PWR7-LE-NEXT: addi 4, 4, .LCPI11_0 at toc@l
+; PWR7-LE-NEXT: stw 3, -16(1)
+; PWR7-LE-NEXT: addi 3, 1, -16
+; PWR7-LE-NEXT: lxvd2x 0, 0, 4
+; PWR7-LE-NEXT: addi 4, 1, -32
+; PWR7-LE-NEXT: lxvd2x 1, 0, 4
+; PWR7-LE-NEXT: xxswapd 34, 0
+; PWR7-LE-NEXT: lxvd2x 0, 0, 3
+; PWR7-LE-NEXT: xxswapd 35, 1
+; PWR7-LE-NEXT: xxswapd 36, 0
+; PWR7-LE-NEXT: vperm 2, 4, 3, 2
+; PWR7-LE-NEXT: blr
+;
+; PWR8-LE-LABEL: build_v4i32_load_3:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lwz 3, 0(3)
+; PWR8-LE-NEXT: li 4, 0
+; PWR8-LE-NEXT: li 5, 0
+; PWR8-LE-NEXT: rldimi 4, 4, 32, 0
+; PWR8-LE-NEXT: rldimi 5, 3, 32, 0
+; PWR8-LE-NEXT: mtfprd 1, 4
+; PWR8-LE-NEXT: mtfprd 0, 5
+; PWR8-LE-NEXT: xxmrghd 34, 0, 1
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load i32, ptr %p, align 4
+ %vecinit1 = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i32 %0, i32 3
+ ret <4 x i32> %vecinit1
+}
+
+define <4 x float> @build_v4f32_load_0(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v4f32_load_0:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lwz 3, 0(3)
+; PWR7-BE-NEXT: li 4, 0
+; PWR7-BE-NEXT: stw 4, -16(1)
+; PWR7-BE-NEXT: stw 3, -32(1)
+; PWR7-BE-NEXT: addis 3, 2, .LCPI12_0 at toc@ha
+; PWR7-BE-NEXT: addi 3, 3, .LCPI12_0 at toc@l
+; PWR7-BE-NEXT: lxvw4x 34, 0, 3
+; PWR7-BE-NEXT: addi 3, 1, -16
+; PWR7-BE-NEXT: lxvw4x 35, 0, 3
+; PWR7-BE-NEXT: addi 3, 1, -32
+; PWR7-BE-NEXT: lxvw4x 36, 0, 3
+; PWR7-BE-NEXT: vperm 2, 4, 3, 2
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: build_v4f32_load_0:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lfs 0, 0(3)
+; PWR8-BE-NEXT: xxlxor 1, 1, 1
+; PWR8-BE-NEXT: xxmrghd 0, 0, 1
+; PWR8-BE-NEXT: xxspltd 1, 1, 0
+; PWR8-BE-NEXT: xvcvdpsp 34, 0
+; PWR8-BE-NEXT: xvcvdpsp 35, 1
+; PWR8-BE-NEXT: vmrgew 2, 2, 3
+; PWR8-BE-NEXT: blr
+;
+; PWR7-LE-LABEL: build_v4f32_load_0:
+; PWR7-LE: # %bb.0: # %entry
+; PWR7-LE-NEXT: li 4, 0
+; PWR7-LE-NEXT: lwz 3, 0(3)
+; PWR7-LE-NEXT: stw 4, -16(1)
+; PWR7-LE-NEXT: addis 4, 2, .LCPI12_0 at toc@ha
+; PWR7-LE-NEXT: addi 4, 4, .LCPI12_0 at toc@l
+; PWR7-LE-NEXT: stw 3, -32(1)
+; PWR7-LE-NEXT: addi 3, 1, -32
+; PWR7-LE-NEXT: lxvd2x 0, 0, 4
+; PWR7-LE-NEXT: addi 4, 1, -16
+; PWR7-LE-NEXT: lxvd2x 1, 0, 4
+; PWR7-LE-NEXT: xxswapd 34, 0
+; PWR7-LE-NEXT: lxvd2x 0, 0, 3
+; PWR7-LE-NEXT: xxswapd 35, 1
+; PWR7-LE-NEXT: xxswapd 36, 0
+; PWR7-LE-NEXT: vperm 2, 3, 4, 2
+; PWR7-LE-NEXT: blr
+;
+; PWR8-LE-LABEL: build_v4f32_load_0:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lfs 0, 0(3)
+; PWR8-LE-NEXT: xxlxor 1, 1, 1
+; PWR8-LE-NEXT: xxmrghd 0, 1, 0
+; PWR8-LE-NEXT: xxspltd 1, 1, 0
+; PWR8-LE-NEXT: xvcvdpsp 34, 0
+; PWR8-LE-NEXT: xvcvdpsp 35, 1
+; PWR8-LE-NEXT: vmrgew 2, 3, 2
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load float, ptr %p, align 4
+ %vecinit1 = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %0, i32 0
+ ret <4 x float> %vecinit1
+}
+
+define <4 x float> @build_v4f32_load_1(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v4f32_load_1:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lwz 3, 0(3)
+; PWR7-BE-NEXT: li 4, 0
+; PWR7-BE-NEXT: stw 4, -32(1)
+; PWR7-BE-NEXT: stw 3, -16(1)
+; PWR7-BE-NEXT: addis 3, 2, .LCPI13_0 at toc@ha
+; PWR7-BE-NEXT: addi 3, 3, .LCPI13_0 at toc@l
+; PWR7-BE-NEXT: lxvw4x 34, 0, 3
+; PWR7-BE-NEXT: addi 3, 1, -32
+; PWR7-BE-NEXT: lxvw4x 35, 0, 3
+; PWR7-BE-NEXT: addi 3, 1, -16
+; PWR7-BE-NEXT: lxvw4x 36, 0, 3
+; PWR7-BE-NEXT: vperm 2, 3, 4, 2
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: build_v4f32_load_1:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lfs 0, 0(3)
+; PWR8-BE-NEXT: xxlxor 1, 1, 1
+; PWR8-BE-NEXT: xxmrghd 0, 0, 1
+; PWR8-BE-NEXT: xxspltd 1, 1, 0
+; PWR8-BE-NEXT: xvcvdpsp 34, 0
+; PWR8-BE-NEXT: xvcvdpsp 35, 1
+; PWR8-BE-NEXT: vmrgew 2, 3, 2
+; PWR8-BE-NEXT: blr
+;
+; PWR7-LE-LABEL: build_v4f32_load_1:
+; PWR7-LE: # %bb.0: # %entry
+; PWR7-LE-NEXT: li 4, 0
+; PWR7-LE-NEXT: lwz 3, 0(3)
+; PWR7-LE-NEXT: stw 4, -32(1)
+; PWR7-LE-NEXT: addis 4, 2, .LCPI13_0 at toc@ha
+; PWR7-LE-NEXT: addi 4, 4, .LCPI13_0 at toc@l
+; PWR7-LE-NEXT: stw 3, -16(1)
+; PWR7-LE-NEXT: addi 3, 1, -16
+; PWR7-LE-NEXT: lxvd2x 0, 0, 4
+; PWR7-LE-NEXT: addi 4, 1, -32
+; PWR7-LE-NEXT: lxvd2x 1, 0, 4
+; PWR7-LE-NEXT: xxswapd 34, 0
+; PWR7-LE-NEXT: lxvd2x 0, 0, 3
+; PWR7-LE-NEXT: xxswapd 35, 1
+; PWR7-LE-NEXT: xxswapd 36, 0
+; PWR7-LE-NEXT: vperm 2, 4, 3, 2
+; PWR7-LE-NEXT: blr
+;
+; PWR8-LE-LABEL: build_v4f32_load_1:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lfs 0, 0(3)
+; PWR8-LE-NEXT: xxlxor 1, 1, 1
+; PWR8-LE-NEXT: xxmrghd 0, 1, 0
+; PWR8-LE-NEXT: xxspltd 1, 1, 0
+; PWR8-LE-NEXT: xvcvdpsp 34, 0
+; PWR8-LE-NEXT: xvcvdpsp 35, 1
+; PWR8-LE-NEXT: vmrgew 2, 2, 3
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load float, ptr %p, align 4
+ %vecinit1 = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %0, i32 1
+ ret <4 x float> %vecinit1
+}
+
+define <4 x float> @build_v4f32_load_2(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v4f32_load_2:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lwz 3, 0(3)
+; PWR7-BE-NEXT: li 4, 0
+; PWR7-BE-NEXT: stw 4, -32(1)
+; PWR7-BE-NEXT: stw 3, -16(1)
+; PWR7-BE-NEXT: addis 3, 2, .LCPI14_0 at toc@ha
+; PWR7-BE-NEXT: addi 3, 3, .LCPI14_0 at toc@l
+; PWR7-BE-NEXT: lxvw4x 34, 0, 3
+; PWR7-BE-NEXT: addi 3, 1, -32
+; PWR7-BE-NEXT: lxvw4x 35, 0, 3
+; PWR7-BE-NEXT: addi 3, 1, -16
+; PWR7-BE-NEXT: lxvw4x 36, 0, 3
+; PWR7-BE-NEXT: vperm 2, 3, 4, 2
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: build_v4f32_load_2:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lfs 0, 0(3)
+; PWR8-BE-NEXT: xxlxor 1, 1, 1
+; PWR8-BE-NEXT: xxmrghd 0, 1, 0
+; PWR8-BE-NEXT: xxspltd 1, 1, 0
+; PWR8-BE-NEXT: xvcvdpsp 34, 0
+; PWR8-BE-NEXT: xvcvdpsp 35, 1
+; PWR8-BE-NEXT: vmrgew 2, 2, 3
+; PWR8-BE-NEXT: blr
+;
+; PWR7-LE-LABEL: build_v4f32_load_2:
+; PWR7-LE: # %bb.0: # %entry
+; PWR7-LE-NEXT: li 4, 0
+; PWR7-LE-NEXT: lwz 3, 0(3)
+; PWR7-LE-NEXT: stw 4, -32(1)
+; PWR7-LE-NEXT: addis 4, 2, .LCPI14_0 at toc@ha
+; PWR7-LE-NEXT: addi 4, 4, .LCPI14_0 at toc@l
+; PWR7-LE-NEXT: stw 3, -16(1)
+; PWR7-LE-NEXT: addi 3, 1, -16
+; PWR7-LE-NEXT: lxvd2x 0, 0, 4
+; PWR7-LE-NEXT: addi 4, 1, -32
+; PWR7-LE-NEXT: lxvd2x 1, 0, 4
+; PWR7-LE-NEXT: xxswapd 34, 0
+; PWR7-LE-NEXT: lxvd2x 0, 0, 3
+; PWR7-LE-NEXT: xxswapd 35, 1
+; PWR7-LE-NEXT: xxswapd 36, 0
+; PWR7-LE-NEXT: vperm 2, 4, 3, 2
+; PWR7-LE-NEXT: blr
+;
+; PWR8-LE-LABEL: build_v4f32_load_2:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lfs 0, 0(3)
+; PWR8-LE-NEXT: xxlxor 1, 1, 1
+; PWR8-LE-NEXT: xxmrghd 0, 0, 1
+; PWR8-LE-NEXT: xxspltd 1, 1, 0
+; PWR8-LE-NEXT: xvcvdpsp 34, 0
+; PWR8-LE-NEXT: xvcvdpsp 35, 1
+; PWR8-LE-NEXT: vmrgew 2, 3, 2
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load float, ptr %p, align 4
+ %vecinit1 = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %0, i32 2
+ ret <4 x float> %vecinit1
+}
+
+define <4 x float> @build_v4f32_load_3(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v4f32_load_3:
+; PWR7-BE: # %bb.0: # %entry
+; PWR7-BE-NEXT: lwz 3, 0(3)
+; PWR7-BE-NEXT: li 4, 0
+; PWR7-BE-NEXT: stw 4, -32(1)
+; PWR7-BE-NEXT: stw 3, -16(1)
+; PWR7-BE-NEXT: addis 3, 2, .LCPI15_0 at toc@ha
+; PWR7-BE-NEXT: addi 3, 3, .LCPI15_0 at toc@l
+; PWR7-BE-NEXT: lxvw4x 34, 0, 3
+; PWR7-BE-NEXT: addi 3, 1, -32
+; PWR7-BE-NEXT: lxvw4x 35, 0, 3
+; PWR7-BE-NEXT: addi 3, 1, -16
+; PWR7-BE-NEXT: lxvw4x 36, 0, 3
+; PWR7-BE-NEXT: vperm 2, 3, 4, 2
+; PWR7-BE-NEXT: blr
+;
+; PWR8-BE-LABEL: build_v4f32_load_3:
+; PWR8-BE: # %bb.0: # %entry
+; PWR8-BE-NEXT: lfs 0, 0(3)
+; PWR8-BE-NEXT: xxlxor 1, 1, 1
+; PWR8-BE-NEXT: xxmrghd 0, 1, 0
+; PWR8-BE-NEXT: xxspltd 1, 1, 0
+; PWR8-BE-NEXT: xvcvdpsp 34, 0
+; PWR8-BE-NEXT: xvcvdpsp 35, 1
+; PWR8-BE-NEXT: vmrgew 2, 3, 2
+; PWR8-BE-NEXT: blr
+;
+; PWR7-LE-LABEL: build_v4f32_load_3:
+; PWR7-LE: # %bb.0: # %entry
+; PWR7-LE-NEXT: li 4, 0
+; PWR7-LE-NEXT: lwz 3, 0(3)
+; PWR7-LE-NEXT: stw 4, -32(1)
+; PWR7-LE-NEXT: addis 4, 2, .LCPI15_0 at toc@ha
+; PWR7-LE-NEXT: addi 4, 4, .LCPI15_0 at toc@l
+; PWR7-LE-NEXT: stw 3, -16(1)
+; PWR7-LE-NEXT: addi 3, 1, -16
+; PWR7-LE-NEXT: lxvd2x 0, 0, 4
+; PWR7-LE-NEXT: addi 4, 1, -32
+; PWR7-LE-NEXT: lxvd2x 1, 0, 4
+; PWR7-LE-NEXT: xxswapd 34, 0
+; PWR7-LE-NEXT: lxvd2x 0, 0, 3
+; PWR7-LE-NEXT: xxswapd 35, 1
+; PWR7-LE-NEXT: xxswapd 36, 0
+; PWR7-LE-NEXT: vperm 2, 4, 3, 2
+; PWR7-LE-NEXT: blr
+;
+; PWR8-LE-LABEL: build_v4f32_load_3:
+; PWR8-LE: # %bb.0: # %entry
+; PWR8-LE-NEXT: lfs 0, 0(3)
+; PWR8-LE-NEXT: xxlxor 1, 1, 1
+; PWR8-LE-NEXT: xxmrghd 0, 0, 1
+; PWR8-LE-NEXT: xxspltd 1, 1, 0
+; PWR8-LE-NEXT: xvcvdpsp 34, 0
+; PWR8-LE-NEXT: xvcvdpsp 35, 1
+; PWR8-LE-NEXT: vmrgew 2, 2, 3
+; PWR8-LE-NEXT: blr
+entry:
+ %0 = load float, ptr %p, align 4
+ %vecinit1 = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %0, i32 3
+ ret <4 x float> %vecinit1
+}
More information about the llvm-commits
mailing list