[llvm] 0c41f77 - [PowerPC] Enable safe for 32bit vins* P10 instructions
Zarko Todorovski via llvm-commits
llvm-commits at lists.llvm.org
Mon May 10 07:13:22 PDT 2021
Author: Zarko Todorovski
Date: 2021-05-10T10:13:13-04:00
New Revision: 0c41f77857fccf2a20d48ca96ce61d3c4d1634e6
URL: https://github.com/llvm/llvm-project/commit/0c41f77857fccf2a20d48ca96ce61d3c4d1634e6
DIFF: https://github.com/llvm/llvm-project/commit/0c41f77857fccf2a20d48ca96ce61d3c4d1634e6.diff
LOG: [PowerPC] Enable safe for 32bit vins* P10 instructions
Correctly emit `vins`instructions that are safe in 32bit mode.
Reviewed By: nemanjai, #powerpc
Differential Revision: https://reviews.llvm.org/D101383
Added:
Modified:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCInstrPrefix.td
llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 0007200237d7..b98cf939c731 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -10444,6 +10444,8 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
return Op;
if (Subtarget.isISA3_1()) {
+ if ((VT == MVT::v2i64 || VT == MVT::v2f64) && !Subtarget.isPPC64())
+ return SDValue();
// On P10, we have legal lowering for constant and variable indices for
// integer vectors.
if (VT == MVT::v16i8 || VT == MVT::v8i16 || VT == MVT::v4i32 ||
diff --git a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
index 7c50d4b2d931..b183dbd4b3bb 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrPrefix.td
@@ -1,3 +1,6 @@
+//-------------------------- Predicate definitions ---------------------------//
+def IsPPC32 : Predicate<"!Subtarget->isPPC64()">;
+
// Mask immediates for MMA instructions (2, 4 and 8 bits).
def Msk2Imm : ImmLeaf<i32, [{ return isUInt<2>(Imm); }]>;
def Msk4Imm : ImmLeaf<i32, [{ return isUInt<4>(Imm); }]>;
@@ -2752,7 +2755,44 @@ let Predicates = [IsISA3_1, HasVSX, IsLittleEndian] in {
(VINSD $vDi, !mul(!sub(1, i), 8), $rA)>;
}
-let Predicates = [IsISA3_1, HasVSX, IsBigEndian] in {
+let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC32] in {
+ // Indexed vector insert element
+ def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i32:$rB)),
+ (VINSBLX $vDi, $rB, $rA)>;
+ def : Pat<(v8i16 (PPCvecinsertelt v8i16:$vDi, i32:$rA, i32:$rB)),
+ (VINSHLX $vDi, $rB, $rA)>;
+ def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, i32:$rB)),
+ (VINSWLX $vDi, $rB, $rA)>;
+
+ def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, f32:$A, i32:$rB)),
+ (VINSWLX $vDi, $rB, Bitcast.FltToInt)>;
+ def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
+ i32:$rB)),
+ (VINSWLX $vDi, $rB, (LWZ memri:$rA))>;
+ def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
+ i32:$rB)),
+ (VINSWLX $vDi, $rB, (PLWZ memri34:$rA))>;
+ def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
+ i32:$rB)),
+ (VINSWLX $vDi, $rB, (LWZX memrr:$rA))>;
+
+ // Immediate vector insert element
+ foreach i = [0, 1, 2, 3] in {
+ def : Pat<(v4i32 (PPCvecinsertelt v4i32:$vDi, i32:$rA, (i32 i))),
+ (VINSW $vDi, !mul(i, 4), $rA)>;
+ def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddr:$rA)),
+ (i32 i))),
+ (VINSW $vDi, !mul(i, 4), (LWZ memri:$rA))>;
+ def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load iaddrX34:$rA)),
+ (i32 i))),
+ (VINSW $vDi, !mul(i, 4), (PLWZ memri34:$rA))>;
+ def : Pat<(v4f32 (PPCvecinsertelt v4f32:$vDi, (f32 (load xaddr:$rA)),
+ (i32 i))),
+ (VINSW $vDi, !mul(i, 4), (LWZX memrr:$rA))>;
+ }
+}
+
+let Predicates = [IsISA3_1, HasVSX, IsBigEndian, IsPPC64] in {
// Indexed vector insert element
def : Pat<(v16i8 (PPCvecinsertelt v16i8:$vDi, i32:$rA, i64:$rB)),
(VINSBLX $vDi, InsertEltShift.Sub32Left0, $rA)>;
diff --git a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
index 80d2bb11c992..26dd16121fec 100644
--- a/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
+++ b/llvm/test/CodeGen/PowerPC/aix-vec_insert_elt.ll
@@ -1,6 +1,8 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-64
; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff -vec-extabi -mcpu=pwr9 < %s | FileCheck %s -check-prefix=CHECK-32
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi -mcpu=pwr10 < %s | FileCheck %s -check-prefix=CHECK-64-P10
+; RUN: llc -verify-machineinstrs -mtriple=powerpc-ibm-aix-xcoff -vec-extabi -mcpu=pwr10 < %s | FileCheck %s -check-prefix=CHECK-32-P10
; Byte indexed
@@ -22,6 +24,16 @@ define <16 x i8> @testByte(<16 x i8> %a, i64 %b, i64 %idx) {
; CHECK-32-NEXT: stbx 4, 5, 3
; CHECK-32-NEXT: lxv 34, -16(1)
; CHECK-32-NEXT: blr
+;
+; CHECK-64-P10-LABEL: testByte:
+; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: vinsblx 2, 4, 3
+; CHECK-64-P10-NEXT: blr
+;
+; CHECK-32-P10-LABEL: testByte:
+; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: vinsblx 2, 6, 4
+; CHECK-32-P10-NEXT: blr
entry:
%conv = trunc i64 %b to i8
%vecins = insertelement <16 x i8> %a, i8 %conv, i64 %idx
@@ -48,6 +60,17 @@ define <8 x i16> @testHalf(<8 x i16> %a, i64 %b, i64 %idx) {
; CHECK-32-NEXT: sthx 4, 5, 3
; CHECK-32-NEXT: lxv 34, -16(1)
; CHECK-32-NEXT: blr
+;
+; CHECK-64-P10-LABEL: testHalf:
+; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: slwi 4, 4, 1
+; CHECK-64-P10-NEXT: vinshlx 2, 4, 3
+; CHECK-64-P10-NEXT: blr
+;
+; CHECK-32-P10-LABEL: testHalf:
+; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: vinshlx 2, 6, 4
+; CHECK-32-P10-NEXT: blr
entry:
%conv = trunc i64 %b to i16
%vecins = insertelement <8 x i16> %a, i16 %conv, i64 %idx
@@ -74,6 +97,17 @@ define <4 x i32> @testWord(<4 x i32> %a, i64 %b, i64 %idx) {
; CHECK-32-NEXT: stwx 4, 5, 3
; CHECK-32-NEXT: lxv 34, -16(1)
; CHECK-32-NEXT: blr
+;
+; CHECK-64-P10-LABEL: testWord:
+; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: slwi 4, 4, 2
+; CHECK-64-P10-NEXT: vinswlx 2, 4, 3
+; CHECK-64-P10-NEXT: blr
+;
+; CHECK-32-P10-LABEL: testWord:
+; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: vinswlx 2, 6, 4
+; CHECK-32-P10-NEXT: blr
entry:
%conv = trunc i64 %b to i32
%vecins = insertelement <4 x i32> %a, i32 %conv, i64 %idx
@@ -96,6 +130,18 @@ define <4 x i32> @testWordImm(<4 x i32> %a, i64 %b) {
; CHECK-32-NEXT: xxinsertw 34, 0, 4
; CHECK-32-NEXT: xxinsertw 34, 0, 12
; CHECK-32-NEXT: blr
+;
+; CHECK-64-P10-LABEL: testWordImm:
+; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: vinsw 2, 3, 4
+; CHECK-64-P10-NEXT: vinsw 2, 3, 12
+; CHECK-64-P10-NEXT: blr
+;
+; CHECK-32-P10-LABEL: testWordImm:
+; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: vinsw 2, 4, 4
+; CHECK-32-P10-NEXT: vinsw 2, 4, 12
+; CHECK-32-P10-NEXT: blr
entry:
%conv = trunc i64 %b to i32
%vecins = insertelement <4 x i32> %a, i32 %conv, i32 1
@@ -130,6 +176,20 @@ define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
; CHECK-32-NEXT: stwx 4, 5, 3
; CHECK-32-NEXT: lxv 34, -16(1)
; CHECK-32-NEXT: blr
+;
+; CHECK-64-P10-LABEL: testDoubleword:
+; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
+; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3
+; CHECK-64-P10-NEXT: blr
+;
+; CHECK-32-P10-LABEL: testDoubleword:
+; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: add 5, 6, 6
+; CHECK-32-P10-NEXT: vinswlx 2, 5, 3
+; CHECK-32-P10-NEXT: addi 3, 5, 1
+; CHECK-32-P10-NEXT: vinswlx 2, 3, 4
+; CHECK-32-P10-NEXT: blr
entry:
%vecins = insertelement <2 x i64> %a, i64 %b, i64 %idx
ret <2 x i64> %vecins
@@ -151,6 +211,17 @@ define <2 x i64> @testDoublewordImm(<2 x i64> %a, i64 %b) {
; CHECK-32-NEXT: mtfprwz 0, 4
; CHECK-32-NEXT: xxinsertw 34, 0, 12
; CHECK-32-NEXT: blr
+;
+; CHECK-64-P10-LABEL: testDoublewordImm:
+; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: vinsd 2, 3, 8
+; CHECK-64-P10-NEXT: blr
+;
+; CHECK-32-P10-LABEL: testDoublewordImm:
+; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: vinsw 2, 3, 8
+; CHECK-32-P10-NEXT: vinsw 2, 4, 12
+; CHECK-32-P10-NEXT: blr
entry:
%vecins = insertelement <2 x i64> %a, i64 %b, i32 1
ret <2 x i64> %vecins
@@ -170,6 +241,17 @@ define <2 x i64> @testDoublewordImm2(<2 x i64> %a, i64 %b) {
; CHECK-32-NEXT: mtfprwz 0, 4
; CHECK-32-NEXT: xxinsertw 34, 0, 4
; CHECK-32-NEXT: blr
+;
+; CHECK-64-P10-LABEL: testDoublewordImm2:
+; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: vinsd 2, 3, 0
+; CHECK-64-P10-NEXT: blr
+;
+; CHECK-32-P10-LABEL: testDoublewordImm2:
+; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: vinsw 2, 3, 0
+; CHECK-32-P10-NEXT: vinsw 2, 4, 4
+; CHECK-32-P10-NEXT: blr
entry:
%vecins = insertelement <2 x i64> %a, i64 %b, i32 0
ret <2 x i64> %vecins
@@ -195,6 +277,24 @@ define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
; CHECK-32-NEXT: stfsx 1, 4, 3
; CHECK-32-NEXT: lxv 34, -16(1)
; CHECK-32-NEXT: blr
+;
+; CHECK-64-P10-LABEL: testFloat1:
+; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: xscvdpspn 0, 1
+; CHECK-64-P10-NEXT: extsw 3, 4
+; CHECK-64-P10-NEXT: slwi 3, 3, 2
+; CHECK-64-P10-NEXT: xxsldwi 0, 0, 0, 3
+; CHECK-64-P10-NEXT: mffprwz 4, 0
+; CHECK-64-P10-NEXT: vinswlx 2, 3, 4
+; CHECK-64-P10-NEXT: blr
+;
+; CHECK-32-P10-LABEL: testFloat1:
+; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: xscvdpspn 0, 1
+; CHECK-32-P10-NEXT: xxsldwi 0, 0, 0, 3
+; CHECK-32-P10-NEXT: mffprwz 3, 0
+; CHECK-32-P10-NEXT: vinswlx 2, 4, 3
+; CHECK-32-P10-NEXT: blr
entry:
%vecins = insertelement <4 x float> %a, float %b, i32 %idx1
ret <4 x float> %vecins
@@ -203,18 +303,18 @@ entry:
define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
; CHECK-64-LABEL: testFloat2:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-DAG: lwz 6, 0(3)
-; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29
-; CHECK-64-DAG: addi 7, 1, -32
-; CHECK-64-DAG: stxv 34, -32(1)
-; CHECK-64-DAG: stwx 6, 7, 4
-; CHECK-64-DAG: rlwinm 4, 5, 2, 28, 29
-; CHECK-64-DAG: addi 5, 1, -16
-; CHECK-64-DAG: lxv 0, -32(1)
-; CHECK-64-DAG: lwz 3, 1(3)
-; CHECK-64-DAG: stxv 0, -16(1)
-; CHECK-64-DAG: stwx 3, 5, 4
-; CHECK-64-DAG: lxv 34, -16(1)
+; CHECK-64-NEXT: lwz 6, 0(3)
+; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29
+; CHECK-64-NEXT: addi 7, 1, -32
+; CHECK-64-NEXT: stxv 34, -32(1)
+; CHECK-64-NEXT: stwx 6, 7, 4
+; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29
+; CHECK-64-NEXT: addi 5, 1, -16
+; CHECK-64-NEXT: lxv 0, -32(1)
+; CHECK-64-NEXT: lwz 3, 1(3)
+; CHECK-64-NEXT: stxv 0, -16(1)
+; CHECK-64-NEXT: stwx 3, 5, 4
+; CHECK-64-NEXT: lxv 34, -16(1)
; CHECK-64-NEXT: blr
;
; CHECK-32-LABEL: testFloat2:
@@ -232,6 +332,26 @@ define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 ze
; CHECK-32-NEXT: stwx 3, 4, 5
; CHECK-32-NEXT: lxv 34, -16(1)
; CHECK-32-NEXT: blr
+;
+; CHECK-64-P10-LABEL: testFloat2:
+; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: lwz 6, 0(3)
+; CHECK-64-P10-NEXT: extsw 4, 4
+; CHECK-64-P10-NEXT: lwz 3, 1(3)
+; CHECK-64-P10-NEXT: slwi 4, 4, 2
+; CHECK-64-P10-NEXT: vinswlx 2, 4, 6
+; CHECK-64-P10-NEXT: extsw 4, 5
+; CHECK-64-P10-NEXT: slwi 4, 4, 2
+; CHECK-64-P10-NEXT: vinswlx 2, 4, 3
+; CHECK-64-P10-NEXT: blr
+;
+; CHECK-32-P10-LABEL: testFloat2:
+; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: lwz 6, 0(3)
+; CHECK-32-P10-NEXT: lwz 3, 1(3)
+; CHECK-32-P10-NEXT: vinswlx 2, 4, 6
+; CHECK-32-P10-NEXT: vinswlx 2, 5, 3
+; CHECK-32-P10-NEXT: blr
entry:
%0 = bitcast i8* %b to float*
%add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1
@@ -246,21 +366,21 @@ entry:
define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
; CHECK-64-LABEL: testFloat3:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-DAG: lis 6, 1
-; CHECK-64-DAG: rlwinm 4, 4, 2, 28, 29
-; CHECK-64-DAG: addi 7, 1, -32
-; CHECK-64-DAG: lwzx 6, 3, 6
-; CHECK-64-DAG: stxv 34, -32(1)
-; CHECK-64-DAG: stwx 6, 7, 4
-; CHECK-64-DAG: li 4, 1
-; CHECK-64-DAG: lxv 0, -32(1)
-; CHECK-64-DAG: rldic 4, 4, 36, 27
-; CHECK-64-DAG: lwzx 3, 3, 4
-; CHECK-64-DAG: rlwinm 4, 5, 2, 28, 29
-; CHECK-64-DAG: addi 5, 1, -16
-; CHECK-64-DAG: stxv 0, -16(1)
-; CHECK-64-DAG: stwx 3, 5, 4
-; CHECK-64-DAG: lxv 34, -16(1)
+; CHECK-64-NEXT: lis 6, 1
+; CHECK-64-NEXT: rlwinm 4, 4, 2, 28, 29
+; CHECK-64-NEXT: addi 7, 1, -32
+; CHECK-64-NEXT: lwzx 6, 3, 6
+; CHECK-64-NEXT: stxv 34, -32(1)
+; CHECK-64-NEXT: stwx 6, 7, 4
+; CHECK-64-NEXT: li 4, 1
+; CHECK-64-NEXT: lxv 0, -32(1)
+; CHECK-64-NEXT: rldic 4, 4, 36, 27
+; CHECK-64-NEXT: lwzx 3, 3, 4
+; CHECK-64-NEXT: rlwinm 4, 5, 2, 28, 29
+; CHECK-64-NEXT: addi 5, 1, -16
+; CHECK-64-NEXT: stxv 0, -16(1)
+; CHECK-64-NEXT: stwx 3, 5, 4
+; CHECK-64-NEXT: lxv 34, -16(1)
; CHECK-64-NEXT: blr
;
; CHECK-32-LABEL: testFloat3:
@@ -279,6 +399,29 @@ define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 ze
; CHECK-32-NEXT: stwx 3, 4, 5
; CHECK-32-NEXT: lxv 34, -16(1)
; CHECK-32-NEXT: blr
+;
+; CHECK-64-P10-LABEL: testFloat3:
+; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: plwz 6, 65536(3), 0
+; CHECK-64-P10-NEXT: extsw 4, 4
+; CHECK-64-P10-NEXT: slwi 4, 4, 2
+; CHECK-64-P10-NEXT: vinswlx 2, 4, 6
+; CHECK-64-P10-NEXT: li 4, 1
+; CHECK-64-P10-NEXT: rldic 4, 4, 36, 27
+; CHECK-64-P10-NEXT: lwzx 3, 3, 4
+; CHECK-64-P10-NEXT: extsw 4, 5
+; CHECK-64-P10-NEXT: slwi 4, 4, 2
+; CHECK-64-P10-NEXT: vinswlx 2, 4, 3
+; CHECK-64-P10-NEXT: blr
+;
+; CHECK-32-P10-LABEL: testFloat3:
+; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: lis 6, 1
+; CHECK-32-P10-NEXT: lwzx 6, 3, 6
+; CHECK-32-P10-NEXT: lwz 3, 0(3)
+; CHECK-32-P10-NEXT: vinswlx 2, 4, 6
+; CHECK-32-P10-NEXT: vinswlx 2, 5, 3
+; CHECK-32-P10-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i8, i8* %b, i64 65536
%0 = bitcast i8* %add.ptr to float*
@@ -309,6 +452,22 @@ define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
; CHECK-32-NEXT: xxinsertw 34, 0, 0
; CHECK-32-NEXT: xxinsertw 34, 0, 8
; CHECK-32-NEXT: blr
+;
+; CHECK-64-P10-LABEL: testFloatImm1:
+; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: xscvdpspn 0, 1
+; CHECK-64-P10-NEXT: xxsldwi 0, 0, 0, 3
+; CHECK-64-P10-NEXT: xxinsertw 34, 0, 0
+; CHECK-64-P10-NEXT: xxinsertw 34, 0, 8
+; CHECK-64-P10-NEXT: blr
+;
+; CHECK-32-P10-LABEL: testFloatImm1:
+; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: xscvdpspn 0, 1
+; CHECK-32-P10-NEXT: xxsldwi 0, 0, 0, 3
+; CHECK-32-P10-NEXT: xxinsertw 34, 0, 0
+; CHECK-32-P10-NEXT: xxinsertw 34, 0, 8
+; CHECK-32-P10-NEXT: blr
entry:
%vecins = insertelement <4 x float> %a, float %b, i32 0
%vecins1 = insertelement <4 x float> %vecins, float %b, i32 2
@@ -339,6 +498,22 @@ define <4 x float> @testFloatImm2(<4 x float> %a, i32* %b) {
; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-32-NEXT: xxinsertw 34, 0, 8
; CHECK-32-NEXT: blr
+;
+; CHECK-64-P10-LABEL: testFloatImm2:
+; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: lwz 4, 0(3)
+; CHECK-64-P10-NEXT: lwz 3, 4(3)
+; CHECK-64-P10-NEXT: vinsw 2, 4, 0
+; CHECK-64-P10-NEXT: vinsw 2, 3, 8
+; CHECK-64-P10-NEXT: blr
+;
+; CHECK-32-P10-LABEL: testFloatImm2:
+; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: lwz 4, 0(3)
+; CHECK-32-P10-NEXT: lwz 3, 4(3)
+; CHECK-32-P10-NEXT: vinsw 2, 4, 0
+; CHECK-32-P10-NEXT: vinsw 2, 3, 8
+; CHECK-32-P10-NEXT: blr
entry:
%0 = bitcast i32* %b to float*
%add.ptr1 = getelementptr inbounds i32, i32* %b, i64 1
@@ -378,6 +553,25 @@ define <4 x float> @testFloatImm3(<4 x float> %a, i32* %b) {
; CHECK-32-NEXT: xxsldwi 0, 0, 0, 3
; CHECK-32-NEXT: xxinsertw 34, 0, 8
; CHECK-32-NEXT: blr
+;
+; CHECK-64-P10-LABEL: testFloatImm3:
+; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: plwz 4, 262144(3), 0
+; CHECK-64-P10-NEXT: vinsw 2, 4, 0
+; CHECK-64-P10-NEXT: li 4, 1
+; CHECK-64-P10-NEXT: rldic 4, 4, 38, 25
+; CHECK-64-P10-NEXT: lwzx 3, 3, 4
+; CHECK-64-P10-NEXT: vinsw 2, 3, 8
+; CHECK-64-P10-NEXT: blr
+;
+; CHECK-32-P10-LABEL: testFloatImm3:
+; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: lis 4, 4
+; CHECK-32-P10-NEXT: lwzx 4, 3, 4
+; CHECK-32-P10-NEXT: lwz 3, 0(3)
+; CHECK-32-P10-NEXT: vinsw 2, 4, 0
+; CHECK-32-P10-NEXT: vinsw 2, 3, 8
+; CHECK-32-P10-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i32, i32* %b, i64 65536
%0 = bitcast i32* %add.ptr to float*
@@ -410,6 +604,23 @@ define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1)
; CHECK-32-NEXT: stfdx 1, 4, 3
; CHECK-32-NEXT: lxv 34, -16(1)
; CHECK-32-NEXT: blr
+;
+; CHECK-64-P10-LABEL: testDouble1:
+; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: extsw 4, 4
+; CHECK-64-P10-NEXT: mffprd 3, 1
+; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
+; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3
+; CHECK-64-P10-NEXT: blr
+;
+; CHECK-32-P10-LABEL: testDouble1:
+; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: addi 4, 1, -16
+; CHECK-32-P10-NEXT: rlwinm 3, 5, 3, 28, 28
+; CHECK-32-P10-NEXT: stxv 34, -16(1)
+; CHECK-32-P10-NEXT: stfdx 1, 4, 3
+; CHECK-32-P10-NEXT: lxv 34, -16(1)
+; CHECK-32-P10-NEXT: blr
entry:
%vecins = insertelement <2 x double> %a, double %b, i32 %idx1
ret <2 x double> %vecins
@@ -418,19 +629,19 @@ entry:
define <2 x double> @testDouble2(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
; CHECK-64-LABEL: testDouble2:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-DAG: ld 6, 0(3)
-; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28
-; CHECK-64-DAG: addi 7, 1, -32
-; CHECK-64-DAG: stxv 34, -32(1)
-; CHECK-64-DAG: stdx 6, 7, 4
-; CHECK-64-DAG: li 4, 1
-; CHECK-64-DAG: lxv 0, -32(1)
-; CHECK-64-DAG: ldx 3, 3, 4
-; CHECK-64-DAG: rlwinm 4, 5, 3, 28, 28
-; CHECK-64-DAG: addi 5, 1, -16
-; CHECK-64-DAG: stxv 0, -16(1)
-; CHECK-64-DAG: stdx 3, 5, 4
-; CHECK-64-DAG: lxv 34, -16(1)
+; CHECK-64-NEXT: ld 6, 0(3)
+; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28
+; CHECK-64-NEXT: addi 7, 1, -32
+; CHECK-64-NEXT: stxv 34, -32(1)
+; CHECK-64-NEXT: stdx 6, 7, 4
+; CHECK-64-NEXT: li 4, 1
+; CHECK-64-NEXT: lxv 0, -32(1)
+; CHECK-64-NEXT: ldx 3, 3, 4
+; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28
+; CHECK-64-NEXT: addi 5, 1, -16
+; CHECK-64-NEXT: stxv 0, -16(1)
+; CHECK-64-NEXT: stdx 3, 5, 4
+; CHECK-64-NEXT: lxv 34, -16(1)
; CHECK-64-NEXT: blr
;
; CHECK-32-LABEL: testDouble2:
@@ -448,6 +659,34 @@ define <2 x double> @testDouble2(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32
; CHECK-32-NEXT: stfdx 1, 3, 5
; CHECK-32-NEXT: lxv 34, -16(1)
; CHECK-32-NEXT: blr
+;
+; CHECK-64-P10-LABEL: testDouble2:
+; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: ld 6, 0(3)
+; CHECK-64-P10-NEXT: extsw 4, 4
+; CHECK-64-P10-NEXT: pld 3, 1(3), 0
+; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
+; CHECK-64-P10-NEXT: vinsdlx 2, 4, 6
+; CHECK-64-P10-NEXT: extsw 4, 5
+; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
+; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3
+; CHECK-64-P10-NEXT: blr
+;
+; CHECK-32-P10-LABEL: testDouble2:
+; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: lfd 0, 0(3)
+; CHECK-32-P10-NEXT: addi 6, 1, -32
+; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28
+; CHECK-32-P10-NEXT: stxv 34, -32(1)
+; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28
+; CHECK-32-P10-NEXT: stfdx 0, 6, 4
+; CHECK-32-P10-NEXT: lxv 0, -32(1)
+; CHECK-32-P10-NEXT: lfd 1, 1(3)
+; CHECK-32-P10-NEXT: addi 3, 1, -16
+; CHECK-32-P10-NEXT: stxv 0, -16(1)
+; CHECK-32-P10-NEXT: stfdx 1, 3, 5
+; CHECK-32-P10-NEXT: lxv 34, -16(1)
+; CHECK-32-P10-NEXT: blr
entry:
%0 = bitcast i8* %b to double*
%add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1
@@ -462,21 +701,21 @@ entry:
define <2 x double> @testDouble3(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
; CHECK-64-LABEL: testDouble3:
; CHECK-64: # %bb.0: # %entry
-; CHECK-64-DAG: lis 6, 1
-; CHECK-64-DAG: rlwinm 4, 4, 3, 28, 28
-; CHECK-64-DAG: addi 7, 1, -32
-; CHECK-64-DAG: ldx 6, 3, 6
-; CHECK-64-DAG: stxv 34, -32(1)
-; CHECK-64-DAG: stdx 6, 7, 4
-; CHECK-64-DAG: li 4, 1
-; CHECK-64-DAG: lxv 0, -32(1)
-; CHECK-64-DAG: rldic 4, 4, 36, 27
-; CHECK-64-DAG: ldx 3, 3, 4
-; CHECK-64-DAG: rlwinm 4, 5, 3, 28, 28
-; CHECK-64-DAG: addi 5, 1, -16
-; CHECK-64-DAG: stxv 0, -16(1)
-; CHECK-64-DAG: stdx 3, 5, 4
-; CHECK-64-DAG: lxv 34, -16(1)
+; CHECK-64-NEXT: lis 6, 1
+; CHECK-64-NEXT: rlwinm 4, 4, 3, 28, 28
+; CHECK-64-NEXT: addi 7, 1, -32
+; CHECK-64-NEXT: ldx 6, 3, 6
+; CHECK-64-NEXT: stxv 34, -32(1)
+; CHECK-64-NEXT: stdx 6, 7, 4
+; CHECK-64-NEXT: li 4, 1
+; CHECK-64-NEXT: lxv 0, -32(1)
+; CHECK-64-NEXT: rldic 4, 4, 36, 27
+; CHECK-64-NEXT: ldx 3, 3, 4
+; CHECK-64-NEXT: rlwinm 4, 5, 3, 28, 28
+; CHECK-64-NEXT: addi 5, 1, -16
+; CHECK-64-NEXT: stxv 0, -16(1)
+; CHECK-64-NEXT: stdx 3, 5, 4
+; CHECK-64-NEXT: lxv 34, -16(1)
; CHECK-64-NEXT: blr
;
; CHECK-32-LABEL: testDouble3:
@@ -495,6 +734,37 @@ define <2 x double> @testDouble3(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32
; CHECK-32-NEXT: stfdx 1, 3, 5
; CHECK-32-NEXT: lxv 34, -16(1)
; CHECK-32-NEXT: blr
+;
+; CHECK-64-P10-LABEL: testDouble3:
+; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: pld 6, 65536(3), 0
+; CHECK-64-P10-NEXT: extsw 4, 4
+; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
+; CHECK-64-P10-NEXT: vinsdlx 2, 4, 6
+; CHECK-64-P10-NEXT: li 4, 1
+; CHECK-64-P10-NEXT: rldic 4, 4, 36, 27
+; CHECK-64-P10-NEXT: ldx 3, 3, 4
+; CHECK-64-P10-NEXT: extsw 4, 5
+; CHECK-64-P10-NEXT: rlwinm 4, 4, 3, 0, 28
+; CHECK-64-P10-NEXT: vinsdlx 2, 4, 3
+; CHECK-64-P10-NEXT: blr
+;
+; CHECK-32-P10-LABEL: testDouble3:
+; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: lis 6, 1
+; CHECK-32-P10-NEXT: rlwinm 4, 4, 3, 28, 28
+; CHECK-32-P10-NEXT: rlwinm 5, 5, 3, 28, 28
+; CHECK-32-P10-NEXT: lfdx 0, 3, 6
+; CHECK-32-P10-NEXT: addi 6, 1, -32
+; CHECK-32-P10-NEXT: stxv 34, -32(1)
+; CHECK-32-P10-NEXT: stfdx 0, 6, 4
+; CHECK-32-P10-NEXT: lxv 0, -32(1)
+; CHECK-32-P10-NEXT: lfd 1, 0(3)
+; CHECK-32-P10-NEXT: addi 3, 1, -16
+; CHECK-32-P10-NEXT: stxv 0, -16(1)
+; CHECK-32-P10-NEXT: stfdx 1, 3, 5
+; CHECK-32-P10-NEXT: lxv 34, -16(1)
+; CHECK-32-P10-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i8, i8* %b, i64 65536
%0 = bitcast i8* %add.ptr to double*
@@ -521,6 +791,18 @@ define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) {
; CHECK-32-NEXT: # kill: def $f1 killed $f1 def $vsl1
; CHECK-32-NEXT: xxpermdi 34, 1, 34, 1
; CHECK-32-NEXT: blr
+;
+; CHECK-64-P10-LABEL: testDoubleImm1:
+; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; CHECK-64-P10-NEXT: xxpermdi 34, 1, 34, 1
+; CHECK-64-P10-NEXT: blr
+;
+; CHECK-32-P10-LABEL: testDoubleImm1:
+; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: # kill: def $f1 killed $f1 def $vsl1
+; CHECK-32-P10-NEXT: xxpermdi 34, 1, 34, 1
+; CHECK-32-P10-NEXT: blr
entry:
%vecins = insertelement <2 x double> %a, double %b, i32 0
ret <2 x double> %vecins
@@ -538,6 +820,18 @@ define <2 x double> @testDoubleImm2(<2 x double> %a, i32* %b) {
; CHECK-32-NEXT: lfd 0, 0(3)
; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1
; CHECK-32-NEXT: blr
+;
+; CHECK-64-P10-LABEL: testDoubleImm2:
+; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: lfd 0, 0(3)
+; CHECK-64-P10-NEXT: xxpermdi 34, 0, 34, 1
+; CHECK-64-P10-NEXT: blr
+;
+; CHECK-32-P10-LABEL: testDoubleImm2:
+; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: lfd 0, 0(3)
+; CHECK-32-P10-NEXT: xxpermdi 34, 0, 34, 1
+; CHECK-32-P10-NEXT: blr
entry:
%0 = bitcast i32* %b to double*
%1 = load double, double* %0, align 8
@@ -557,6 +851,18 @@ define <2 x double> @testDoubleImm3(<2 x double> %a, i32* %b) {
; CHECK-32-NEXT: lfd 0, 4(3)
; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1
; CHECK-32-NEXT: blr
+;
+; CHECK-64-P10-LABEL: testDoubleImm3:
+; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: lfd 0, 4(3)
+; CHECK-64-P10-NEXT: xxpermdi 34, 0, 34, 1
+; CHECK-64-P10-NEXT: blr
+;
+; CHECK-32-P10-LABEL: testDoubleImm3:
+; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: lfd 0, 4(3)
+; CHECK-32-P10-NEXT: xxpermdi 34, 0, 34, 1
+; CHECK-32-P10-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i32, i32* %b, i64 1
%0 = bitcast i32* %add.ptr to double*
@@ -579,6 +885,20 @@ define <2 x double> @testDoubleImm4(<2 x double> %a, i32* %b) {
; CHECK-32-NEXT: lfdx 0, 3, 4
; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1
; CHECK-32-NEXT: blr
+;
+; CHECK-64-P10-LABEL: testDoubleImm4:
+; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: lis 4, 4
+; CHECK-64-P10-NEXT: lfdx 0, 3, 4
+; CHECK-64-P10-NEXT: xxpermdi 34, 0, 34, 1
+; CHECK-64-P10-NEXT: blr
+;
+; CHECK-32-P10-LABEL: testDoubleImm4:
+; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: lis 4, 4
+; CHECK-32-P10-NEXT: lfdx 0, 3, 4
+; CHECK-32-P10-NEXT: xxpermdi 34, 0, 34, 1
+; CHECK-32-P10-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i32, i32* %b, i64 65536
%0 = bitcast i32* %add.ptr to double*
@@ -601,6 +921,20 @@ define <2 x double> @testDoubleImm5(<2 x double> %a, i32* %b) {
; CHECK-32-NEXT: lfd 0, 0(3)
; CHECK-32-NEXT: xxpermdi 34, 0, 34, 1
; CHECK-32-NEXT: blr
+;
+; CHECK-64-P10-LABEL: testDoubleImm5:
+; CHECK-64-P10: # %bb.0: # %entry
+; CHECK-64-P10-NEXT: li 4, 1
+; CHECK-64-P10-NEXT: rldic 4, 4, 38, 25
+; CHECK-64-P10-NEXT: lfdx 0, 3, 4
+; CHECK-64-P10-NEXT: xxpermdi 34, 0, 34, 1
+; CHECK-64-P10-NEXT: blr
+;
+; CHECK-32-P10-LABEL: testDoubleImm5:
+; CHECK-32-P10: # %bb.0: # %entry
+; CHECK-32-P10-NEXT: lfd 0, 0(3)
+; CHECK-32-P10-NEXT: xxpermdi 34, 0, 34, 1
+; CHECK-32-P10-NEXT: blr
entry:
%add.ptr = getelementptr inbounds i32, i32* %b, i64 68719476736
%0 = bitcast i32* %add.ptr to double*
More information about the llvm-commits
mailing list