[llvm] afd9582 - [PowerPC] Enhance test for PR #73609. NFC.

Kai Luo via llvm-commits llvm-commits at lists.llvm.org
Wed Nov 29 21:09:56 PST 2023


Author: Kai Luo
Date: 2023-11-30T05:06:29Z
New Revision: afd9582b36ba87b48730f95ab42b79d07c29235e

URL: https://github.com/llvm/llvm-project/commit/afd9582b36ba87b48730f95ab42b79d07c29235e
DIFF: https://github.com/llvm/llvm-project/commit/afd9582b36ba87b48730f95ab42b79d07c29235e.diff

LOG: [PowerPC] Enhance test for PR #73609. NFC.

Added: 
    

Modified: 
    llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll

Removed: 
    


################################################################################
diff  --git a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
index 16c2617b3564931..cc32a76b22c2872 100644
--- a/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
+++ b/llvm/test/CodeGen/PowerPC/build-vector-from-load-and-zeros.ll
@@ -1,50 +1,843 @@
 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr7 < %s | FileCheck %s
-; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr8 < %s | FileCheck --check-prefix=PWR8 %s
-
-define  <2 x i64> @build_v2i64(ptr nocapture noundef readonly %p, <2 x i64> noundef %a) {
-; CHECK-LABEL: build_v2i64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lwz 3, 0(3)
-; CHECK-NEXT:    li 4, 0
-; CHECK-NEXT:    std 4, -8(1)
-; CHECK-NEXT:    std 3, -16(1)
-; CHECK-NEXT:    addi 3, 1, -16
-; CHECK-NEXT:    lxvd2x 34, 0, 3
-; CHECK-NEXT:    blr
-;
-; PWR8-LABEL: build_v2i64:
-; PWR8:       # %bb.0: # %entry
-; PWR8-NEXT:    lwz 3, 0(3)
-; PWR8-NEXT:    li 4, 0
-; PWR8-NEXT:    mtfprd 0, 4
-; PWR8-NEXT:    mtfprd 1, 3
-; PWR8-NEXT:    xxmrghd 34, 1, 0
-; PWR8-NEXT:    blr
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr7 < %s | FileCheck --check-prefix=PWR7-BE %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-- -mcpu=pwr8 < %s | FileCheck --check-prefix=PWR8-BE %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-- -mcpu=pwr7 < %s | FileCheck --check-prefix=PWR7-LE %s
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-- -mcpu=pwr8 < %s | FileCheck --check-prefix=PWR8-LE %s
+
+define  <2 x i64> @build_v2i64_extload_0(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v2i64_extload_0:
+; PWR7-BE:       # %bb.0: # %entry
+; PWR7-BE-NEXT:    lwz 3, 0(3)
+; PWR7-BE-NEXT:    li 4, 0
+; PWR7-BE-NEXT:    std 4, -8(1)
+; PWR7-BE-NEXT:    std 3, -16(1)
+; PWR7-BE-NEXT:    addi 3, 1, -16
+; PWR7-BE-NEXT:    lxvd2x 34, 0, 3
+; PWR7-BE-NEXT:    blr
+;
+; PWR8-BE-LABEL: build_v2i64_extload_0:
+; PWR8-BE:       # %bb.0: # %entry
+; PWR8-BE-NEXT:    lwz 3, 0(3)
+; PWR8-BE-NEXT:    li 4, 0
+; PWR8-BE-NEXT:    mtfprd 0, 4
+; PWR8-BE-NEXT:    mtfprd 1, 3
+; PWR8-BE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-BE-NEXT:    blr
+;
+; PWR7-LE-LABEL: build_v2i64_extload_0:
+; PWR7-LE:       # %bb.0: # %entry
+; PWR7-LE-NEXT:    li 4, 0
+; PWR7-LE-NEXT:    lwz 3, 0(3)
+; PWR7-LE-NEXT:    stw 4, -16(1)
+; PWR7-LE-NEXT:    addis 4, 2, .LCPI0_0 at toc@ha
+; PWR7-LE-NEXT:    addi 4, 4, .LCPI0_0 at toc@l
+; PWR7-LE-NEXT:    stw 3, -32(1)
+; PWR7-LE-NEXT:    addi 3, 1, -32
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 4
+; PWR7-LE-NEXT:    addi 4, 1, -16
+; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
+; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
+; PWR7-LE-NEXT:    xxswapd 35, 1
+; PWR7-LE-NEXT:    xxswapd 36, 0
+; PWR7-LE-NEXT:    vperm 2, 3, 4, 2
+; PWR7-LE-NEXT:    blr
+;
+; PWR8-LE-LABEL: build_v2i64_extload_0:
+; PWR8-LE:       # %bb.0: # %entry
+; PWR8-LE-NEXT:    lwz 3, 0(3)
+; PWR8-LE-NEXT:    li 4, 0
+; PWR8-LE-NEXT:    rldimi 3, 4, 32, 0
+; PWR8-LE-NEXT:    rldimi 4, 4, 32, 0
+; PWR8-LE-NEXT:    mtfprd 0, 3
+; PWR8-LE-NEXT:    mtfprd 1, 4
+; PWR8-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-LE-NEXT:    blr
 entry:
   %0 = load i32, ptr %p, align 4
   %conv = zext i32 %0 to i64
-  %vecinit1 = insertelement <2 x i64> <i64 poison, i64 0>, i64 %conv, i64 0
+  %vecinit1 = insertelement <2 x i64> <i64 0, i64 0>, i64 %conv, i64 0
   ret <2 x i64> %vecinit1
 }
 
-define <2 x double> @build_v2f64(ptr nocapture noundef readonly %p, <2 x double> noundef %a) {
-; CHECK-LABEL: build_v2f64:
-; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    lfs 0, 0(3)
-; CHECK-NEXT:    xxlxor 1, 1, 1
-; CHECK-NEXT:    xxmrghd 34, 0, 1
-; CHECK-NEXT:    blr
+define  <2 x i64> @build_v2i64_extload_1(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v2i64_extload_1:
+; PWR7-BE:       # %bb.0: # %entry
+; PWR7-BE-NEXT:    lwz 3, 0(3)
+; PWR7-BE-NEXT:    li 4, 0
+; PWR7-BE-NEXT:    std 4, -16(1)
+; PWR7-BE-NEXT:    std 3, -8(1)
+; PWR7-BE-NEXT:    addi 3, 1, -16
+; PWR7-BE-NEXT:    lxvd2x 34, 0, 3
+; PWR7-BE-NEXT:    blr
 ;
-; PWR8-LABEL: build_v2f64:
-; PWR8:       # %bb.0: # %entry
-; PWR8-NEXT:    lfs 0, 0(3)
-; PWR8-NEXT:    xxlxor 1, 1, 1
-; PWR8-NEXT:    xxmrghd 34, 0, 1
-; PWR8-NEXT:    blr
+; PWR8-BE-LABEL: build_v2i64_extload_1:
+; PWR8-BE:       # %bb.0: # %entry
+; PWR8-BE-NEXT:    lwz 3, 0(3)
+; PWR8-BE-NEXT:    li 4, 0
+; PWR8-BE-NEXT:    mtfprd 0, 4
+; PWR8-BE-NEXT:    mtfprd 1, 3
+; PWR8-BE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-BE-NEXT:    blr
+;
+; PWR7-LE-LABEL: build_v2i64_extload_1:
+; PWR7-LE:       # %bb.0: # %entry
+; PWR7-LE-NEXT:    lwz 3, 0(3)
+; PWR7-LE-NEXT:    li 4, 0
+; PWR7-LE-NEXT:    std 4, -16(1)
+; PWR7-LE-NEXT:    std 3, -8(1)
+; PWR7-LE-NEXT:    addi 3, 1, -16
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
+; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    blr
+;
+; PWR8-LE-LABEL: build_v2i64_extload_1:
+; PWR8-LE:       # %bb.0: # %entry
+; PWR8-LE-NEXT:    lwz 3, 0(3)
+; PWR8-LE-NEXT:    li 4, 0
+; PWR8-LE-NEXT:    mtfprd 0, 4
+; PWR8-LE-NEXT:    mtfprd 1, 3
+; PWR8-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-LE-NEXT:    blr
+entry:
+  %0 = load i32, ptr %p, align 4
+  %conv = zext i32 %0 to i64
+  %vecinit1 = insertelement <2 x i64> <i64 0, i64 0>, i64 %conv, i64 1
+  ret <2 x i64> %vecinit1
+}
+
+define <2 x double> @build_v2f64_extload_0(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v2f64_extload_0:
+; PWR7-BE:       # %bb.0: # %entry
+; PWR7-BE-NEXT:    lfs 0, 0(3)
+; PWR7-BE-NEXT:    xxlxor 1, 1, 1
+; PWR7-BE-NEXT:    xxmrghd 34, 0, 1
+; PWR7-BE-NEXT:    blr
+;
+; PWR8-BE-LABEL: build_v2f64_extload_0:
+; PWR8-BE:       # %bb.0: # %entry
+; PWR8-BE-NEXT:    lfs 0, 0(3)
+; PWR8-BE-NEXT:    xxlxor 1, 1, 1
+; PWR8-BE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-BE-NEXT:    blr
+;
+; PWR7-LE-LABEL: build_v2f64_extload_0:
+; PWR7-LE:       # %bb.0: # %entry
+; PWR7-LE-NEXT:    lfs 0, 0(3)
+; PWR7-LE-NEXT:    xxlxor 1, 1, 1
+; PWR7-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR7-LE-NEXT:    blr
+;
+; PWR8-LE-LABEL: build_v2f64_extload_0:
+; PWR8-LE:       # %bb.0: # %entry
+; PWR8-LE-NEXT:    lfs 0, 0(3)
+; PWR8-LE-NEXT:    xxlxor 1, 1, 1
+; PWR8-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-LE-NEXT:    blr
 entry:
   %0 = load float, ptr %p, align 4
   %conv = fpext float %0 to double
-  %vecinit1 = insertelement <2 x double> <double poison, double 0.000000e+00>, double %conv, i64 0
+  %vecinit1 = insertelement <2 x double> <double 0.000000e+00, double 0.000000e+00>, double %conv, i64 0
+  ret <2 x double> %vecinit1
+}
+
+define <2 x double> @build_v2f64_extload_1(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v2f64_extload_1:
+; PWR7-BE:       # %bb.0: # %entry
+; PWR7-BE-NEXT:    lfs 0, 0(3)
+; PWR7-BE-NEXT:    xxlxor 1, 1, 1
+; PWR7-BE-NEXT:    xxmrghd 34, 1, 0
+; PWR7-BE-NEXT:    blr
+;
+; PWR8-BE-LABEL: build_v2f64_extload_1:
+; PWR8-BE:       # %bb.0: # %entry
+; PWR8-BE-NEXT:    lfs 0, 0(3)
+; PWR8-BE-NEXT:    xxlxor 1, 1, 1
+; PWR8-BE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-BE-NEXT:    blr
+;
+; PWR7-LE-LABEL: build_v2f64_extload_1:
+; PWR7-LE:       # %bb.0: # %entry
+; PWR7-LE-NEXT:    lfs 0, 0(3)
+; PWR7-LE-NEXT:    xxlxor 1, 1, 1
+; PWR7-LE-NEXT:    xxmrghd 34, 0, 1
+; PWR7-LE-NEXT:    blr
+;
+; PWR8-LE-LABEL: build_v2f64_extload_1:
+; PWR8-LE:       # %bb.0: # %entry
+; PWR8-LE-NEXT:    lfs 0, 0(3)
+; PWR8-LE-NEXT:    xxlxor 1, 1, 1
+; PWR8-LE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-LE-NEXT:    blr
+entry:
+  %0 = load float, ptr %p, align 4
+  %conv = fpext float %0 to double
+  %vecinit1 = insertelement <2 x double> <double 0.000000e+00, double 0.000000e+00>, double %conv, i64 1
+  ret <2 x double> %vecinit1
+}
+
+define <2 x double> @build_v2f64_load_0(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v2f64_load_0:
+; PWR7-BE:       # %bb.0: # %entry
+; PWR7-BE-NEXT:    lfd 0, 0(3)
+; PWR7-BE-NEXT:    xxlxor 1, 1, 1
+; PWR7-BE-NEXT:    xxmrghd 34, 0, 1
+; PWR7-BE-NEXT:    blr
+;
+; PWR8-BE-LABEL: build_v2f64_load_0:
+; PWR8-BE:       # %bb.0: # %entry
+; PWR8-BE-NEXT:    lfd 0, 0(3)
+; PWR8-BE-NEXT:    xxlxor 1, 1, 1
+; PWR8-BE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-BE-NEXT:    blr
+;
+; PWR7-LE-LABEL: build_v2f64_load_0:
+; PWR7-LE:       # %bb.0: # %entry
+; PWR7-LE-NEXT:    lfd 0, 0(3)
+; PWR7-LE-NEXT:    xxlxor 1, 1, 1
+; PWR7-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR7-LE-NEXT:    blr
+;
+; PWR8-LE-LABEL: build_v2f64_load_0:
+; PWR8-LE:       # %bb.0: # %entry
+; PWR8-LE-NEXT:    lfd 0, 0(3)
+; PWR8-LE-NEXT:    xxlxor 1, 1, 1
+; PWR8-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-LE-NEXT:    blr
+entry:
+  %0 = load double, ptr %p, align 8
+  %vecinit1 = insertelement <2 x double> <double 0.000000e+00, double 0.000000e+00>, double %0, i64 0
+  ret <2 x double> %vecinit1
+}
+
+define <2 x double> @build_v2f64_load_1(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v2f64_load_1:
+; PWR7-BE:       # %bb.0: # %entry
+; PWR7-BE-NEXT:    lfd 0, 0(3)
+; PWR7-BE-NEXT:    xxlxor 1, 1, 1
+; PWR7-BE-NEXT:    xxmrghd 34, 1, 0
+; PWR7-BE-NEXT:    blr
+;
+; PWR8-BE-LABEL: build_v2f64_load_1:
+; PWR8-BE:       # %bb.0: # %entry
+; PWR8-BE-NEXT:    lfd 0, 0(3)
+; PWR8-BE-NEXT:    xxlxor 1, 1, 1
+; PWR8-BE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-BE-NEXT:    blr
+;
+; PWR7-LE-LABEL: build_v2f64_load_1:
+; PWR7-LE:       # %bb.0: # %entry
+; PWR7-LE-NEXT:    lfd 0, 0(3)
+; PWR7-LE-NEXT:    xxlxor 1, 1, 1
+; PWR7-LE-NEXT:    xxmrghd 34, 0, 1
+; PWR7-LE-NEXT:    blr
+;
+; PWR8-LE-LABEL: build_v2f64_load_1:
+; PWR8-LE:       # %bb.0: # %entry
+; PWR8-LE-NEXT:    lfd 0, 0(3)
+; PWR8-LE-NEXT:    xxlxor 1, 1, 1
+; PWR8-LE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-LE-NEXT:    blr
+entry:
+  %0 = load double, ptr %p, align 8
+  %vecinit1 = insertelement <2 x double> <double 0.000000e+00, double 0.000000e+00>, double %0, i64 1
   ret <2 x double> %vecinit1
 }
+
+define <2 x i64> @build_v2i64_load_0(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v2i64_load_0:
+; PWR7-BE:       # %bb.0: # %entry
+; PWR7-BE-NEXT:    ld 3, 0(3)
+; PWR7-BE-NEXT:    li 4, 0
+; PWR7-BE-NEXT:    std 4, -8(1)
+; PWR7-BE-NEXT:    std 3, -16(1)
+; PWR7-BE-NEXT:    addi 3, 1, -16
+; PWR7-BE-NEXT:    lxvd2x 34, 0, 3
+; PWR7-BE-NEXT:    blr
+;
+; PWR8-BE-LABEL: build_v2i64_load_0:
+; PWR8-BE:       # %bb.0: # %entry
+; PWR8-BE-NEXT:    ld 3, 0(3)
+; PWR8-BE-NEXT:    li 4, 0
+; PWR8-BE-NEXT:    mtfprd 0, 4
+; PWR8-BE-NEXT:    mtfprd 1, 3
+; PWR8-BE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-BE-NEXT:    blr
+;
+; PWR7-LE-LABEL: build_v2i64_load_0:
+; PWR7-LE:       # %bb.0: # %entry
+; PWR7-LE-NEXT:    ld 3, 0(3)
+; PWR7-LE-NEXT:    li 4, 0
+; PWR7-LE-NEXT:    std 4, -8(1)
+; PWR7-LE-NEXT:    std 3, -16(1)
+; PWR7-LE-NEXT:    addi 3, 1, -16
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
+; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    blr
+;
+; PWR8-LE-LABEL: build_v2i64_load_0:
+; PWR8-LE:       # %bb.0: # %entry
+; PWR8-LE-NEXT:    ld 3, 0(3)
+; PWR8-LE-NEXT:    li 4, 0
+; PWR8-LE-NEXT:    mtfprd 0, 4
+; PWR8-LE-NEXT:    mtfprd 1, 3
+; PWR8-LE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-LE-NEXT:    blr
+entry:
+  %0 = load i64, ptr %p, align 8
+  %vecinit1 = insertelement <2 x i64> <i64 0, i64 0>, i64 %0, i64 0
+  ret <2 x i64> %vecinit1
+}
+
+define <2 x i64> @build_v2i64_load_1(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v2i64_load_1:
+; PWR7-BE:       # %bb.0: # %entry
+; PWR7-BE-NEXT:    ld 3, 0(3)
+; PWR7-BE-NEXT:    li 4, 0
+; PWR7-BE-NEXT:    std 4, -16(1)
+; PWR7-BE-NEXT:    std 3, -8(1)
+; PWR7-BE-NEXT:    addi 3, 1, -16
+; PWR7-BE-NEXT:    lxvd2x 34, 0, 3
+; PWR7-BE-NEXT:    blr
+;
+; PWR8-BE-LABEL: build_v2i64_load_1:
+; PWR8-BE:       # %bb.0: # %entry
+; PWR8-BE-NEXT:    ld 3, 0(3)
+; PWR8-BE-NEXT:    li 4, 0
+; PWR8-BE-NEXT:    mtfprd 0, 4
+; PWR8-BE-NEXT:    mtfprd 1, 3
+; PWR8-BE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-BE-NEXT:    blr
+;
+; PWR7-LE-LABEL: build_v2i64_load_1:
+; PWR7-LE:       # %bb.0: # %entry
+; PWR7-LE-NEXT:    ld 3, 0(3)
+; PWR7-LE-NEXT:    li 4, 0
+; PWR7-LE-NEXT:    std 4, -16(1)
+; PWR7-LE-NEXT:    std 3, -8(1)
+; PWR7-LE-NEXT:    addi 3, 1, -16
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
+; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    blr
+;
+; PWR8-LE-LABEL: build_v2i64_load_1:
+; PWR8-LE:       # %bb.0: # %entry
+; PWR8-LE-NEXT:    ld 3, 0(3)
+; PWR8-LE-NEXT:    li 4, 0
+; PWR8-LE-NEXT:    mtfprd 0, 4
+; PWR8-LE-NEXT:    mtfprd 1, 3
+; PWR8-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-LE-NEXT:    blr
+entry:
+  %0 = load i64, ptr %p, align 8
+  %vecinit1 = insertelement <2 x i64> <i64 0, i64 0>, i64 %0, i64 1
+  ret <2 x i64> %vecinit1
+}
+
+define <4 x i32> @build_v4i32_load_0(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v4i32_load_0:
+; PWR7-BE:       # %bb.0: # %entry
+; PWR7-BE-NEXT:    lwz 3, 0(3)
+; PWR7-BE-NEXT:    li 4, 0
+; PWR7-BE-NEXT:    stw 4, -16(1)
+; PWR7-BE-NEXT:    stw 3, -32(1)
+; PWR7-BE-NEXT:    addis 3, 2, .LCPI8_0 at toc@ha
+; PWR7-BE-NEXT:    addi 3, 3, .LCPI8_0 at toc@l
+; PWR7-BE-NEXT:    lxvw4x 34, 0, 3
+; PWR7-BE-NEXT:    addi 3, 1, -16
+; PWR7-BE-NEXT:    lxvw4x 35, 0, 3
+; PWR7-BE-NEXT:    addi 3, 1, -32
+; PWR7-BE-NEXT:    lxvw4x 36, 0, 3
+; PWR7-BE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-BE-NEXT:    blr
+;
+; PWR8-BE-LABEL: build_v4i32_load_0:
+; PWR8-BE:       # %bb.0: # %entry
+; PWR8-BE-NEXT:    lwz 3, 0(3)
+; PWR8-BE-NEXT:    li 4, 0
+; PWR8-BE-NEXT:    li 5, 0
+; PWR8-BE-NEXT:    rldimi 4, 4, 32, 0
+; PWR8-BE-NEXT:    rldimi 5, 3, 32, 0
+; PWR8-BE-NEXT:    mtfprd 1, 4
+; PWR8-BE-NEXT:    mtfprd 0, 5
+; PWR8-BE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-BE-NEXT:    blr
+;
+; PWR7-LE-LABEL: build_v4i32_load_0:
+; PWR7-LE:       # %bb.0: # %entry
+; PWR7-LE-NEXT:    li 4, 0
+; PWR7-LE-NEXT:    lwz 3, 0(3)
+; PWR7-LE-NEXT:    stw 4, -16(1)
+; PWR7-LE-NEXT:    addis 4, 2, .LCPI8_0 at toc@ha
+; PWR7-LE-NEXT:    addi 4, 4, .LCPI8_0 at toc@l
+; PWR7-LE-NEXT:    stw 3, -32(1)
+; PWR7-LE-NEXT:    addi 3, 1, -32
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 4
+; PWR7-LE-NEXT:    addi 4, 1, -16
+; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
+; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
+; PWR7-LE-NEXT:    xxswapd 35, 1
+; PWR7-LE-NEXT:    xxswapd 36, 0
+; PWR7-LE-NEXT:    vperm 2, 3, 4, 2
+; PWR7-LE-NEXT:    blr
+;
+; PWR8-LE-LABEL: build_v4i32_load_0:
+; PWR8-LE:       # %bb.0: # %entry
+; PWR8-LE-NEXT:    lwz 3, 0(3)
+; PWR8-LE-NEXT:    li 4, 0
+; PWR8-LE-NEXT:    rldimi 3, 4, 32, 0
+; PWR8-LE-NEXT:    rldimi 4, 4, 32, 0
+; PWR8-LE-NEXT:    mtfprd 0, 3
+; PWR8-LE-NEXT:    mtfprd 1, 4
+; PWR8-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-LE-NEXT:    blr
+entry:
+  %0 = load i32, ptr %p, align 4
+  %vecinit1 = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i32 %0, i32 0
+  ret <4 x i32> %vecinit1
+}
+
+define <4 x i32> @build_v4i32_load_1(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v4i32_load_1:
+; PWR7-BE:       # %bb.0: # %entry
+; PWR7-BE-NEXT:    lwz 3, 0(3)
+; PWR7-BE-NEXT:    li 4, 0
+; PWR7-BE-NEXT:    stw 4, -32(1)
+; PWR7-BE-NEXT:    stw 3, -16(1)
+; PWR7-BE-NEXT:    addis 3, 2, .LCPI9_0 at toc@ha
+; PWR7-BE-NEXT:    addi 3, 3, .LCPI9_0 at toc@l
+; PWR7-BE-NEXT:    lxvw4x 34, 0, 3
+; PWR7-BE-NEXT:    addi 3, 1, -32
+; PWR7-BE-NEXT:    lxvw4x 35, 0, 3
+; PWR7-BE-NEXT:    addi 3, 1, -16
+; PWR7-BE-NEXT:    lxvw4x 36, 0, 3
+; PWR7-BE-NEXT:    vperm 2, 3, 4, 2
+; PWR7-BE-NEXT:    blr
+;
+; PWR8-BE-LABEL: build_v4i32_load_1:
+; PWR8-BE:       # %bb.0: # %entry
+; PWR8-BE-NEXT:    lwz 3, 0(3)
+; PWR8-BE-NEXT:    li 4, 0
+; PWR8-BE-NEXT:    rldimi 3, 4, 32, 0
+; PWR8-BE-NEXT:    rldimi 4, 4, 32, 0
+; PWR8-BE-NEXT:    mtfprd 0, 3
+; PWR8-BE-NEXT:    mtfprd 1, 4
+; PWR8-BE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-BE-NEXT:    blr
+;
+; PWR7-LE-LABEL: build_v4i32_load_1:
+; PWR7-LE:       # %bb.0: # %entry
+; PWR7-LE-NEXT:    li 4, 0
+; PWR7-LE-NEXT:    lwz 3, 0(3)
+; PWR7-LE-NEXT:    stw 4, -32(1)
+; PWR7-LE-NEXT:    addis 4, 2, .LCPI9_0 at toc@ha
+; PWR7-LE-NEXT:    addi 4, 4, .LCPI9_0 at toc@l
+; PWR7-LE-NEXT:    stw 3, -16(1)
+; PWR7-LE-NEXT:    addi 3, 1, -16
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 4
+; PWR7-LE-NEXT:    addi 4, 1, -32
+; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
+; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
+; PWR7-LE-NEXT:    xxswapd 35, 1
+; PWR7-LE-NEXT:    xxswapd 36, 0
+; PWR7-LE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-LE-NEXT:    blr
+;
+; PWR8-LE-LABEL: build_v4i32_load_1:
+; PWR8-LE:       # %bb.0: # %entry
+; PWR8-LE-NEXT:    lwz 3, 0(3)
+; PWR8-LE-NEXT:    li 4, 0
+; PWR8-LE-NEXT:    li 5, 0
+; PWR8-LE-NEXT:    rldimi 4, 4, 32, 0
+; PWR8-LE-NEXT:    rldimi 5, 3, 32, 0
+; PWR8-LE-NEXT:    mtfprd 1, 4
+; PWR8-LE-NEXT:    mtfprd 0, 5
+; PWR8-LE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-LE-NEXT:    blr
+entry:
+  %0 = load i32, ptr %p, align 4
+  %vecinit1 = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i32 %0, i32 1
+  ret <4 x i32> %vecinit1
+}
+
+define <4 x i32> @build_v4i32_load_2(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v4i32_load_2:
+; PWR7-BE:       # %bb.0: # %entry
+; PWR7-BE-NEXT:    lwz 3, 0(3)
+; PWR7-BE-NEXT:    li 4, 0
+; PWR7-BE-NEXT:    stw 4, -32(1)
+; PWR7-BE-NEXT:    stw 3, -16(1)
+; PWR7-BE-NEXT:    addis 3, 2, .LCPI10_0 at toc@ha
+; PWR7-BE-NEXT:    addi 3, 3, .LCPI10_0 at toc@l
+; PWR7-BE-NEXT:    lxvw4x 34, 0, 3
+; PWR7-BE-NEXT:    addi 3, 1, -32
+; PWR7-BE-NEXT:    lxvw4x 35, 0, 3
+; PWR7-BE-NEXT:    addi 3, 1, -16
+; PWR7-BE-NEXT:    lxvw4x 36, 0, 3
+; PWR7-BE-NEXT:    vperm 2, 3, 4, 2
+; PWR7-BE-NEXT:    blr
+;
+; PWR8-BE-LABEL: build_v4i32_load_2:
+; PWR8-BE:       # %bb.0: # %entry
+; PWR8-BE-NEXT:    lwz 3, 0(3)
+; PWR8-BE-NEXT:    li 4, 0
+; PWR8-BE-NEXT:    li 5, 0
+; PWR8-BE-NEXT:    rldimi 4, 4, 32, 0
+; PWR8-BE-NEXT:    rldimi 5, 3, 32, 0
+; PWR8-BE-NEXT:    mtfprd 1, 4
+; PWR8-BE-NEXT:    mtfprd 0, 5
+; PWR8-BE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-BE-NEXT:    blr
+;
+; PWR7-LE-LABEL: build_v4i32_load_2:
+; PWR7-LE:       # %bb.0: # %entry
+; PWR7-LE-NEXT:    li 4, 0
+; PWR7-LE-NEXT:    lwz 3, 0(3)
+; PWR7-LE-NEXT:    stw 4, -32(1)
+; PWR7-LE-NEXT:    addis 4, 2, .LCPI10_0 at toc@ha
+; PWR7-LE-NEXT:    addi 4, 4, .LCPI10_0 at toc@l
+; PWR7-LE-NEXT:    stw 3, -16(1)
+; PWR7-LE-NEXT:    addi 3, 1, -16
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 4
+; PWR7-LE-NEXT:    addi 4, 1, -32
+; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
+; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
+; PWR7-LE-NEXT:    xxswapd 35, 1
+; PWR7-LE-NEXT:    xxswapd 36, 0
+; PWR7-LE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-LE-NEXT:    blr
+;
+; PWR8-LE-LABEL: build_v4i32_load_2:
+; PWR8-LE:       # %bb.0: # %entry
+; PWR8-LE-NEXT:    lwz 3, 0(3)
+; PWR8-LE-NEXT:    li 4, 0
+; PWR8-LE-NEXT:    rldimi 3, 4, 32, 0
+; PWR8-LE-NEXT:    rldimi 4, 4, 32, 0
+; PWR8-LE-NEXT:    mtfprd 0, 3
+; PWR8-LE-NEXT:    mtfprd 1, 4
+; PWR8-LE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-LE-NEXT:    blr
+entry:
+  %0 = load i32, ptr %p, align 4
+  %vecinit1 = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i32 %0, i32 2
+  ret <4 x i32> %vecinit1
+}
+
+define <4 x i32> @build_v4i32_load_3(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v4i32_load_3:
+; PWR7-BE:       # %bb.0: # %entry
+; PWR7-BE-NEXT:    lwz 3, 0(3)
+; PWR7-BE-NEXT:    li 4, 0
+; PWR7-BE-NEXT:    stw 4, -32(1)
+; PWR7-BE-NEXT:    stw 3, -16(1)
+; PWR7-BE-NEXT:    addis 3, 2, .LCPI11_0 at toc@ha
+; PWR7-BE-NEXT:    addi 3, 3, .LCPI11_0 at toc@l
+; PWR7-BE-NEXT:    lxvw4x 34, 0, 3
+; PWR7-BE-NEXT:    addi 3, 1, -32
+; PWR7-BE-NEXT:    lxvw4x 35, 0, 3
+; PWR7-BE-NEXT:    addi 3, 1, -16
+; PWR7-BE-NEXT:    lxvw4x 36, 0, 3
+; PWR7-BE-NEXT:    vperm 2, 3, 4, 2
+; PWR7-BE-NEXT:    blr
+;
+; PWR8-BE-LABEL: build_v4i32_load_3:
+; PWR8-BE:       # %bb.0: # %entry
+; PWR8-BE-NEXT:    lwz 3, 0(3)
+; PWR8-BE-NEXT:    li 4, 0
+; PWR8-BE-NEXT:    rldimi 3, 4, 32, 0
+; PWR8-BE-NEXT:    rldimi 4, 4, 32, 0
+; PWR8-BE-NEXT:    mtfprd 0, 3
+; PWR8-BE-NEXT:    mtfprd 1, 4
+; PWR8-BE-NEXT:    xxmrghd 34, 1, 0
+; PWR8-BE-NEXT:    blr
+;
+; PWR7-LE-LABEL: build_v4i32_load_3:
+; PWR7-LE:       # %bb.0: # %entry
+; PWR7-LE-NEXT:    li 4, 0
+; PWR7-LE-NEXT:    lwz 3, 0(3)
+; PWR7-LE-NEXT:    stw 4, -32(1)
+; PWR7-LE-NEXT:    addis 4, 2, .LCPI11_0 at toc@ha
+; PWR7-LE-NEXT:    addi 4, 4, .LCPI11_0 at toc@l
+; PWR7-LE-NEXT:    stw 3, -16(1)
+; PWR7-LE-NEXT:    addi 3, 1, -16
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 4
+; PWR7-LE-NEXT:    addi 4, 1, -32
+; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
+; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
+; PWR7-LE-NEXT:    xxswapd 35, 1
+; PWR7-LE-NEXT:    xxswapd 36, 0
+; PWR7-LE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-LE-NEXT:    blr
+;
+; PWR8-LE-LABEL: build_v4i32_load_3:
+; PWR8-LE:       # %bb.0: # %entry
+; PWR8-LE-NEXT:    lwz 3, 0(3)
+; PWR8-LE-NEXT:    li 4, 0
+; PWR8-LE-NEXT:    li 5, 0
+; PWR8-LE-NEXT:    rldimi 4, 4, 32, 0
+; PWR8-LE-NEXT:    rldimi 5, 3, 32, 0
+; PWR8-LE-NEXT:    mtfprd 1, 4
+; PWR8-LE-NEXT:    mtfprd 0, 5
+; PWR8-LE-NEXT:    xxmrghd 34, 0, 1
+; PWR8-LE-NEXT:    blr
+entry:
+  %0 = load i32, ptr %p, align 4
+  %vecinit1 = insertelement <4 x i32> <i32 0, i32 0, i32 0, i32 0>, i32 %0, i32 3
+  ret <4 x i32> %vecinit1
+}
+
+define <4 x float> @build_v4f32_load_0(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v4f32_load_0:
+; PWR7-BE:       # %bb.0: # %entry
+; PWR7-BE-NEXT:    lwz 3, 0(3)
+; PWR7-BE-NEXT:    li 4, 0
+; PWR7-BE-NEXT:    stw 4, -16(1)
+; PWR7-BE-NEXT:    stw 3, -32(1)
+; PWR7-BE-NEXT:    addis 3, 2, .LCPI12_0 at toc@ha
+; PWR7-BE-NEXT:    addi 3, 3, .LCPI12_0 at toc@l
+; PWR7-BE-NEXT:    lxvw4x 34, 0, 3
+; PWR7-BE-NEXT:    addi 3, 1, -16
+; PWR7-BE-NEXT:    lxvw4x 35, 0, 3
+; PWR7-BE-NEXT:    addi 3, 1, -32
+; PWR7-BE-NEXT:    lxvw4x 36, 0, 3
+; PWR7-BE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-BE-NEXT:    blr
+;
+; PWR8-BE-LABEL: build_v4f32_load_0:
+; PWR8-BE:       # %bb.0: # %entry
+; PWR8-BE-NEXT:    lfs 0, 0(3)
+; PWR8-BE-NEXT:    xxlxor 1, 1, 1
+; PWR8-BE-NEXT:    xxmrghd 0, 0, 1
+; PWR8-BE-NEXT:    xxspltd 1, 1, 0
+; PWR8-BE-NEXT:    xvcvdpsp 34, 0
+; PWR8-BE-NEXT:    xvcvdpsp 35, 1
+; PWR8-BE-NEXT:    vmrgew 2, 2, 3
+; PWR8-BE-NEXT:    blr
+;
+; PWR7-LE-LABEL: build_v4f32_load_0:
+; PWR7-LE:       # %bb.0: # %entry
+; PWR7-LE-NEXT:    li 4, 0
+; PWR7-LE-NEXT:    lwz 3, 0(3)
+; PWR7-LE-NEXT:    stw 4, -16(1)
+; PWR7-LE-NEXT:    addis 4, 2, .LCPI12_0 at toc@ha
+; PWR7-LE-NEXT:    addi 4, 4, .LCPI12_0 at toc@l
+; PWR7-LE-NEXT:    stw 3, -32(1)
+; PWR7-LE-NEXT:    addi 3, 1, -32
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 4
+; PWR7-LE-NEXT:    addi 4, 1, -16
+; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
+; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
+; PWR7-LE-NEXT:    xxswapd 35, 1
+; PWR7-LE-NEXT:    xxswapd 36, 0
+; PWR7-LE-NEXT:    vperm 2, 3, 4, 2
+; PWR7-LE-NEXT:    blr
+;
+; PWR8-LE-LABEL: build_v4f32_load_0:
+; PWR8-LE:       # %bb.0: # %entry
+; PWR8-LE-NEXT:    lfs 0, 0(3)
+; PWR8-LE-NEXT:    xxlxor 1, 1, 1
+; PWR8-LE-NEXT:    xxmrghd 0, 1, 0
+; PWR8-LE-NEXT:    xxspltd 1, 1, 0
+; PWR8-LE-NEXT:    xvcvdpsp 34, 0
+; PWR8-LE-NEXT:    xvcvdpsp 35, 1
+; PWR8-LE-NEXT:    vmrgew 2, 3, 2
+; PWR8-LE-NEXT:    blr
+entry:
+  %0 = load float, ptr %p, align 4
+  %vecinit1 = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %0, i32 0
+  ret <4 x float> %vecinit1
+}
+
+define <4 x float> @build_v4f32_load_1(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v4f32_load_1:
+; PWR7-BE:       # %bb.0: # %entry
+; PWR7-BE-NEXT:    lwz 3, 0(3)
+; PWR7-BE-NEXT:    li 4, 0
+; PWR7-BE-NEXT:    stw 4, -32(1)
+; PWR7-BE-NEXT:    stw 3, -16(1)
+; PWR7-BE-NEXT:    addis 3, 2, .LCPI13_0 at toc@ha
+; PWR7-BE-NEXT:    addi 3, 3, .LCPI13_0 at toc@l
+; PWR7-BE-NEXT:    lxvw4x 34, 0, 3
+; PWR7-BE-NEXT:    addi 3, 1, -32
+; PWR7-BE-NEXT:    lxvw4x 35, 0, 3
+; PWR7-BE-NEXT:    addi 3, 1, -16
+; PWR7-BE-NEXT:    lxvw4x 36, 0, 3
+; PWR7-BE-NEXT:    vperm 2, 3, 4, 2
+; PWR7-BE-NEXT:    blr
+;
+; PWR8-BE-LABEL: build_v4f32_load_1:
+; PWR8-BE:       # %bb.0: # %entry
+; PWR8-BE-NEXT:    lfs 0, 0(3)
+; PWR8-BE-NEXT:    xxlxor 1, 1, 1
+; PWR8-BE-NEXT:    xxmrghd 0, 0, 1
+; PWR8-BE-NEXT:    xxspltd 1, 1, 0
+; PWR8-BE-NEXT:    xvcvdpsp 34, 0
+; PWR8-BE-NEXT:    xvcvdpsp 35, 1
+; PWR8-BE-NEXT:    vmrgew 2, 3, 2
+; PWR8-BE-NEXT:    blr
+;
+; PWR7-LE-LABEL: build_v4f32_load_1:
+; PWR7-LE:       # %bb.0: # %entry
+; PWR7-LE-NEXT:    li 4, 0
+; PWR7-LE-NEXT:    lwz 3, 0(3)
+; PWR7-LE-NEXT:    stw 4, -32(1)
+; PWR7-LE-NEXT:    addis 4, 2, .LCPI13_0 at toc@ha
+; PWR7-LE-NEXT:    addi 4, 4, .LCPI13_0 at toc@l
+; PWR7-LE-NEXT:    stw 3, -16(1)
+; PWR7-LE-NEXT:    addi 3, 1, -16
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 4
+; PWR7-LE-NEXT:    addi 4, 1, -32
+; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
+; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
+; PWR7-LE-NEXT:    xxswapd 35, 1
+; PWR7-LE-NEXT:    xxswapd 36, 0
+; PWR7-LE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-LE-NEXT:    blr
+;
+; PWR8-LE-LABEL: build_v4f32_load_1:
+; PWR8-LE:       # %bb.0: # %entry
+; PWR8-LE-NEXT:    lfs 0, 0(3)
+; PWR8-LE-NEXT:    xxlxor 1, 1, 1
+; PWR8-LE-NEXT:    xxmrghd 0, 1, 0
+; PWR8-LE-NEXT:    xxspltd 1, 1, 0
+; PWR8-LE-NEXT:    xvcvdpsp 34, 0
+; PWR8-LE-NEXT:    xvcvdpsp 35, 1
+; PWR8-LE-NEXT:    vmrgew 2, 2, 3
+; PWR8-LE-NEXT:    blr
+entry:
+  %0 = load float, ptr %p, align 4
+  %vecinit1 = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %0, i32 1
+  ret <4 x float> %vecinit1
+}
+
+define <4 x float> @build_v4f32_load_2(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v4f32_load_2:
+; PWR7-BE:       # %bb.0: # %entry
+; PWR7-BE-NEXT:    lwz 3, 0(3)
+; PWR7-BE-NEXT:    li 4, 0
+; PWR7-BE-NEXT:    stw 4, -32(1)
+; PWR7-BE-NEXT:    stw 3, -16(1)
+; PWR7-BE-NEXT:    addis 3, 2, .LCPI14_0 at toc@ha
+; PWR7-BE-NEXT:    addi 3, 3, .LCPI14_0 at toc@l
+; PWR7-BE-NEXT:    lxvw4x 34, 0, 3
+; PWR7-BE-NEXT:    addi 3, 1, -32
+; PWR7-BE-NEXT:    lxvw4x 35, 0, 3
+; PWR7-BE-NEXT:    addi 3, 1, -16
+; PWR7-BE-NEXT:    lxvw4x 36, 0, 3
+; PWR7-BE-NEXT:    vperm 2, 3, 4, 2
+; PWR7-BE-NEXT:    blr
+;
+; PWR8-BE-LABEL: build_v4f32_load_2:
+; PWR8-BE:       # %bb.0: # %entry
+; PWR8-BE-NEXT:    lfs 0, 0(3)
+; PWR8-BE-NEXT:    xxlxor 1, 1, 1
+; PWR8-BE-NEXT:    xxmrghd 0, 1, 0
+; PWR8-BE-NEXT:    xxspltd 1, 1, 0
+; PWR8-BE-NEXT:    xvcvdpsp 34, 0
+; PWR8-BE-NEXT:    xvcvdpsp 35, 1
+; PWR8-BE-NEXT:    vmrgew 2, 2, 3
+; PWR8-BE-NEXT:    blr
+;
+; PWR7-LE-LABEL: build_v4f32_load_2:
+; PWR7-LE:       # %bb.0: # %entry
+; PWR7-LE-NEXT:    li 4, 0
+; PWR7-LE-NEXT:    lwz 3, 0(3)
+; PWR7-LE-NEXT:    stw 4, -32(1)
+; PWR7-LE-NEXT:    addis 4, 2, .LCPI14_0 at toc@ha
+; PWR7-LE-NEXT:    addi 4, 4, .LCPI14_0 at toc@l
+; PWR7-LE-NEXT:    stw 3, -16(1)
+; PWR7-LE-NEXT:    addi 3, 1, -16
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 4
+; PWR7-LE-NEXT:    addi 4, 1, -32
+; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
+; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
+; PWR7-LE-NEXT:    xxswapd 35, 1
+; PWR7-LE-NEXT:    xxswapd 36, 0
+; PWR7-LE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-LE-NEXT:    blr
+;
+; PWR8-LE-LABEL: build_v4f32_load_2:
+; PWR8-LE:       # %bb.0: # %entry
+; PWR8-LE-NEXT:    lfs 0, 0(3)
+; PWR8-LE-NEXT:    xxlxor 1, 1, 1
+; PWR8-LE-NEXT:    xxmrghd 0, 0, 1
+; PWR8-LE-NEXT:    xxspltd 1, 1, 0
+; PWR8-LE-NEXT:    xvcvdpsp 34, 0
+; PWR8-LE-NEXT:    xvcvdpsp 35, 1
+; PWR8-LE-NEXT:    vmrgew 2, 3, 2
+; PWR8-LE-NEXT:    blr
+entry:
+  %0 = load float, ptr %p, align 4
+  %vecinit1 = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %0, i32 2
+  ret <4 x float> %vecinit1
+}
+
+define <4 x float> @build_v4f32_load_3(ptr nocapture noundef readonly %p) {
+; PWR7-BE-LABEL: build_v4f32_load_3:
+; PWR7-BE:       # %bb.0: # %entry
+; PWR7-BE-NEXT:    lwz 3, 0(3)
+; PWR7-BE-NEXT:    li 4, 0
+; PWR7-BE-NEXT:    stw 4, -32(1)
+; PWR7-BE-NEXT:    stw 3, -16(1)
+; PWR7-BE-NEXT:    addis 3, 2, .LCPI15_0 at toc@ha
+; PWR7-BE-NEXT:    addi 3, 3, .LCPI15_0 at toc@l
+; PWR7-BE-NEXT:    lxvw4x 34, 0, 3
+; PWR7-BE-NEXT:    addi 3, 1, -32
+; PWR7-BE-NEXT:    lxvw4x 35, 0, 3
+; PWR7-BE-NEXT:    addi 3, 1, -16
+; PWR7-BE-NEXT:    lxvw4x 36, 0, 3
+; PWR7-BE-NEXT:    vperm 2, 3, 4, 2
+; PWR7-BE-NEXT:    blr
+;
+; PWR8-BE-LABEL: build_v4f32_load_3:
+; PWR8-BE:       # %bb.0: # %entry
+; PWR8-BE-NEXT:    lfs 0, 0(3)
+; PWR8-BE-NEXT:    xxlxor 1, 1, 1
+; PWR8-BE-NEXT:    xxmrghd 0, 1, 0
+; PWR8-BE-NEXT:    xxspltd 1, 1, 0
+; PWR8-BE-NEXT:    xvcvdpsp 34, 0
+; PWR8-BE-NEXT:    xvcvdpsp 35, 1
+; PWR8-BE-NEXT:    vmrgew 2, 3, 2
+; PWR8-BE-NEXT:    blr
+;
+; PWR7-LE-LABEL: build_v4f32_load_3:
+; PWR7-LE:       # %bb.0: # %entry
+; PWR7-LE-NEXT:    li 4, 0
+; PWR7-LE-NEXT:    lwz 3, 0(3)
+; PWR7-LE-NEXT:    stw 4, -32(1)
+; PWR7-LE-NEXT:    addis 4, 2, .LCPI15_0 at toc@ha
+; PWR7-LE-NEXT:    addi 4, 4, .LCPI15_0 at toc@l
+; PWR7-LE-NEXT:    stw 3, -16(1)
+; PWR7-LE-NEXT:    addi 3, 1, -16
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 4
+; PWR7-LE-NEXT:    addi 4, 1, -32
+; PWR7-LE-NEXT:    lxvd2x 1, 0, 4
+; PWR7-LE-NEXT:    xxswapd 34, 0
+; PWR7-LE-NEXT:    lxvd2x 0, 0, 3
+; PWR7-LE-NEXT:    xxswapd 35, 1
+; PWR7-LE-NEXT:    xxswapd 36, 0
+; PWR7-LE-NEXT:    vperm 2, 4, 3, 2
+; PWR7-LE-NEXT:    blr
+;
+; PWR8-LE-LABEL: build_v4f32_load_3:
+; PWR8-LE:       # %bb.0: # %entry
+; PWR8-LE-NEXT:    lfs 0, 0(3)
+; PWR8-LE-NEXT:    xxlxor 1, 1, 1
+; PWR8-LE-NEXT:    xxmrghd 0, 0, 1
+; PWR8-LE-NEXT:    xxspltd 1, 1, 0
+; PWR8-LE-NEXT:    xvcvdpsp 34, 0
+; PWR8-LE-NEXT:    xvcvdpsp 35, 1
+; PWR8-LE-NEXT:    vmrgew 2, 2, 3
+; PWR8-LE-NEXT:    blr
+entry:
+  %0 = load float, ptr %p, align 4
+  %vecinit1 = insertelement <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %0, i32 3
+  ret <4 x float> %vecinit1
+}


        


More information about the llvm-commits mailing list