[llvm] ff769dd - [PowerPC] Minor improvement for insert_vector_elt codegen
Nemanja Ivanovic via llvm-commits
llvm-commits at lists.llvm.org
Fri Apr 16 16:55:32 PDT 2021
Author: Nemanja Ivanovic
Date: 2021-04-16T18:52:37-05:00
New Revision: ff769dd11128839e00eea546f7e68680d9acfd77
URL: https://github.com/llvm/llvm-project/commit/ff769dd11128839e00eea546f7e68680d9acfd77
DIFF: https://github.com/llvm/llvm-project/commit/ff769dd11128839e00eea546f7e68680d9acfd77.diff
LOG: [PowerPC] Minor improvement for insert_vector_elt codegen
For v2f64, all VSX subtargets can insert an element with a single
XXPERMDI.
Added:
Modified:
llvm/lib/Target/PowerPC/PPCISelLowering.cpp
llvm/lib/Target/PowerPC/PPCInstrVSX.td
llvm/test/CodeGen/PowerPC/swaps-le-6.ll
llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
index 48dba751a230d..d4efb2ba66517 100644
--- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -878,6 +878,7 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
if (Subtarget.hasVSX()) {
setOperationAction(ISD::FDIV, MVT::v4f32, Legal);
setOperationAction(ISD::FSQRT, MVT::v4f32, Legal);
+ setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
}
if (Subtarget.hasP8Altivec())
@@ -1247,10 +1248,8 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::v2i64, Legal);
}
- if (Subtarget.isISA3_1()) {
+ if (Subtarget.isISA3_1())
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2i64, Custom);
- setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v2f64, Custom);
- }
}
if (Subtarget.pairedVectorMemops()) {
@@ -10341,6 +10340,9 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
SDValue V2 = Op.getOperand(1);
SDValue V3 = Op.getOperand(2);
+ if (VT == MVT::v2f64 && C)
+ return Op;
+
if (Subtarget.isISA3_1()) {
// On P10, we have legal lowering for constant and variable indices for
// integer vectors.
@@ -10353,7 +10355,7 @@ SDValue PPCTargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
if (VT == MVT::v4f32 || VT == MVT::v2f64) {
if (!C || (VT == MVT::v4f32 && dyn_cast<LoadSDNode>(V2)))
return DAG.getNode(PPCISD::VECINSERT, dl, VT, V1, V2, V3);
- return SDValue();
+ return Op;
}
}
diff --git a/llvm/lib/Target/PowerPC/PPCInstrVSX.td b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
index 471ab32f8778f..869e06c493653 100644
--- a/llvm/lib/Target/PowerPC/PPCInstrVSX.td
+++ b/llvm/lib/Target/PowerPC/PPCInstrVSX.td
@@ -2907,6 +2907,10 @@ def : Pat<WToDPExtractConv.BV02U,
(v2f64 (XVCVUXWDP $A))>;
def : Pat<WToDPExtractConv.BV13U,
(v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 3)))>;
+def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 0)),
+ (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $B, sub_64), $A, 1))>;
+def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 1)),
+ (v2f64 (XXPERMDI $A, (SUBREG_TO_REG (i64 1), $B, sub_64), 0))>;
} // HasVSX, IsBigEndian
// Any little endian VSX subtarget.
@@ -3012,6 +3016,10 @@ def : Pat<WToDPExtractConv.BV02U,
(v2f64 (XVCVUXWDP (XXSLDWI $A, $A, 1)))>;
def : Pat<WToDPExtractConv.BV13U,
(v2f64 (XVCVUXWDP $A))>;
+def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 0)),
+ (v2f64 (XXPERMDI $A, (SUBREG_TO_REG (i64 1), $B, sub_64), 0))>;
+def : Pat<(v2f64 (insertelt v2f64:$A, f64:$B, 1)),
+ (v2f64 (XXPERMDI (SUBREG_TO_REG (i64 1), $B, sub_64), $A, 1))>;
} // HasVSX, IsLittleEndian
// Any pre-Power9 VSX subtarget.
diff --git a/llvm/test/CodeGen/PowerPC/swaps-le-6.ll b/llvm/test/CodeGen/PowerPC/swaps-le-6.ll
index 4437e67992697..e3934ed2a0312 100644
--- a/llvm/test/CodeGen/PowerPC/swaps-le-6.ll
+++ b/llvm/test/CodeGen/PowerPC/swaps-le-6.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -relocation-model=pic -verify-machineinstrs -mcpu=pwr8 -ppc-vsr-nums-as-vr \
; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: -O3 < %s | FileCheck %s
@@ -21,41 +22,48 @@
define void @bar0() {
; CHECK-LABEL: bar0:
-; CHECK: # %bb.0: # %entry
-; CHECK: addis r3, r2, .LC0 at toc@ha
-; CHECK: addis r4, r2, .LC1 at toc@ha
-; CHECK: ld r3, .LC0 at toc@l(r3)
-; CHECK: addis r3, r2, .LC2 at toc@ha
-; CHECK: ld r3, .LC2 at toc@l(r3)
-; CHECK: xxmrgld vs0, vs0, vs1
-; CHECK: stxvd2x vs0, 0, r3
-; CHECK: blr
-;
-; CHECK-P9-NOVECTOR-LABEL: bar0:
-; CHECK-P9-NOVECTOR: # %bb.0: # %entry
-; CHECK-P9-NOVECTOR: addis r3, r2, .LC0 at toc@ha
-; CHECK-P9-NOVECTOR: ld r3, .LC0 at toc@l(r3)
-; CHECK-P9-NOVECTOR: addis r3, r2, .LC1 at toc@ha
-; CHECK-P9-NOVECTOR: addis r3, r2, .LC2 at toc@ha
-; CHECK-P9-NOVECTOR: ld r3, .LC2 at toc@l(r3)
-; CHECK-P9-NOVECTOR: xxmrgld vs0, vs1, vs0
-; CHECK-P9-NOVECTOR: stxvd2x vs0, 0, r3
-; CHECK-P9-NOVECTOR: blr
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addis r3, r2, .LC0 at toc@ha
+; CHECK-NEXT: addis r4, r2, .LC1 at toc@ha
+; CHECK-NEXT: ld r3, .LC0 at toc@l(r3)
+; CHECK-NEXT: lfdx f0, 0, r3
+; CHECK-NEXT: ld r3, .LC1 at toc@l(r4)
+; CHECK-NEXT: lxvd2x vs1, 0, r3
+; CHECK-NEXT: xxswapd vs0, vs0
+; CHECK-NEXT: addis r3, r2, .LC2 at toc@ha
+; CHECK-NEXT: ld r3, .LC2 at toc@l(r3)
+; CHECK-NEXT: xxmrgld vs0, vs0, vs1
+; CHECK-NEXT: stxvd2x vs0, 0, r3
+; CHECK-NEXT: blr
;
; CHECK-P9-LABEL: bar0:
-; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9: addis r3, r2, .LC0 at toc@ha
-; CHECK-P9: ld r3, .LC0 at toc@l(r3)
-; CHECK-P9: lxvx vs0, 0, r3
-; CHECK-P9: addis r3, r2, .LC1 at toc@ha
-; CHECK-P9: ld r3, .LC1 at toc@l(r3)
-; CHECK-P9: lfd f1, 0(r3)
-; CHECK-P9: addis r3, r2, .LC2 at toc@ha
-; CHECK-P9: ld r3, .LC2 at toc@l(r3)
-; CHECK-P9: xxswapd vs1, f1
-; CHECK-P9: xxpermdi vs0, vs0, vs1, 1
-; CHECK-P9: stxvx vs0, 0, r3
-; CHECK-P9: blr
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: addis r3, r2, .LC0 at toc@ha
+; CHECK-P9-NEXT: ld r3, .LC0 at toc@l(r3)
+; CHECK-P9-NEXT: lxvx vs0, 0, r3
+; CHECK-P9-NEXT: addis r3, r2, .LC1 at toc@ha
+; CHECK-P9-NEXT: ld r3, .LC1 at toc@l(r3)
+; CHECK-P9-NEXT: lfd f1, 0(r3)
+; CHECK-P9-NEXT: addis r3, r2, .LC2 at toc@ha
+; CHECK-P9-NEXT: ld r3, .LC2 at toc@l(r3)
+; CHECK-P9-NEXT: xxmrghd vs0, vs0, vs1
+; CHECK-P9-NEXT: stxvx vs0, 0, r3
+; CHECK-P9-NEXT: blr
+;
+; CHECK-P9-NOVECTOR-LABEL: bar0:
+; CHECK-P9-NOVECTOR: # %bb.0: # %entry
+; CHECK-P9-NOVECTOR-NEXT: addis r3, r2, .LC0 at toc@ha
+; CHECK-P9-NOVECTOR-NEXT: ld r3, .LC0 at toc@l(r3)
+; CHECK-P9-NOVECTOR-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P9-NOVECTOR-NEXT: addis r3, r2, .LC1 at toc@ha
+; CHECK-P9-NOVECTOR-NEXT: ld r3, .LC1 at toc@l(r3)
+; CHECK-P9-NOVECTOR-NEXT: lfdx f1, 0, r3
+; CHECK-P9-NOVECTOR-NEXT: addis r3, r2, .LC2 at toc@ha
+; CHECK-P9-NOVECTOR-NEXT: ld r3, .LC2 at toc@l(r3)
+; CHECK-P9-NOVECTOR-NEXT: xxswapd vs1, vs1
+; CHECK-P9-NOVECTOR-NEXT: xxmrgld vs0, vs1, vs0
+; CHECK-P9-NOVECTOR-NEXT: stxvd2x vs0, 0, r3
+; CHECK-P9-NOVECTOR-NEXT: blr
entry:
%0 = load <2 x double>, <2 x double>* @x, align 16
%1 = load double, double* @y, align 8
@@ -66,41 +74,48 @@ entry:
define void @bar1() {
; CHECK-LABEL: bar1:
-; CHECK: # %bb.0: # %entry
-; CHECK: addis r3, r2, .LC0 at toc@ha
-; CHECK: addis r4, r2, .LC1 at toc@ha
-; CHECK: ld r3, .LC0 at toc@l(r3)
-; CHECK: addis r3, r2, .LC2 at toc@ha
-; CHECK: ld r3, .LC2 at toc@l(r3)
-; CHECK: xxpermdi vs0, vs1, vs0, 1
-; CHECK: stxvd2x vs0, 0, r3
-; CHECK: blr
-;
-; CHECK-P9-NOVECTOR-LABEL: bar1:
-; CHECK-P9-NOVECTOR: # %bb.0: # %entry
-; CHECK-P9-NOVECTOR: addis r3, r2, .LC0 at toc@ha
-; CHECK-P9-NOVECTOR: ld r3, .LC0 at toc@l(r3)
-; CHECK-P9-NOVECTOR: addis r3, r2, .LC1 at toc@ha
-; CHECK-P9-NOVECTOR: addis r3, r2, .LC2 at toc@ha
-; CHECK-P9-NOVECTOR: ld r3, .LC2 at toc@l(r3)
-; CHECK-P9-NOVECTOR: xxpermdi vs0, vs0, vs1, 1
-; CHECK-P9-NOVECTOR: stxvd2x vs0, 0, r3
-; CHECK-P9-NOVECTOR: blr
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: addis r3, r2, .LC0 at toc@ha
+; CHECK-NEXT: addis r4, r2, .LC1 at toc@ha
+; CHECK-NEXT: ld r3, .LC0 at toc@l(r3)
+; CHECK-NEXT: lfdx f0, 0, r3
+; CHECK-NEXT: ld r3, .LC1 at toc@l(r4)
+; CHECK-NEXT: lxvd2x vs1, 0, r3
+; CHECK-NEXT: xxswapd vs0, vs0
+; CHECK-NEXT: addis r3, r2, .LC2 at toc@ha
+; CHECK-NEXT: ld r3, .LC2 at toc@l(r3)
+; CHECK-NEXT: xxpermdi vs0, vs1, vs0, 1
+; CHECK-NEXT: stxvd2x vs0, 0, r3
+; CHECK-NEXT: blr
;
; CHECK-P9-LABEL: bar1:
-; CHECK-P9: # %bb.0: # %entry
-; CHECK-P9: addis r3, r2, .LC0 at toc@ha
-; CHECK-P9: ld r3, .LC0 at toc@l(r3)
-; CHECK-P9: lxvx vs0, 0, r3
-; CHECK-P9: addis r3, r2, .LC1 at toc@ha
-; CHECK-P9: ld r3, .LC1 at toc@l(r3)
-; CHECK-P9: lfd f1, 0(r3)
-; CHECK-P9: addis r3, r2, .LC2 at toc@ha
-; CHECK-P9: ld r3, .LC2 at toc@l(r3)
-; CHECK-P9: xxswapd vs1, f1
-; CHECK-P9: xxmrgld vs0, vs1, vs0
-; CHECK-P9: stxvx vs0, 0, r3
-; CHECK-P9: blr
+; CHECK-P9: # %bb.0: # %entry
+; CHECK-P9-NEXT: addis r3, r2, .LC0 at toc@ha
+; CHECK-P9-NEXT: ld r3, .LC0 at toc@l(r3)
+; CHECK-P9-NEXT: lxvx vs0, 0, r3
+; CHECK-P9-NEXT: addis r3, r2, .LC1 at toc@ha
+; CHECK-P9-NEXT: ld r3, .LC1 at toc@l(r3)
+; CHECK-P9-NEXT: lfd f1, 0(r3)
+; CHECK-P9-NEXT: addis r3, r2, .LC2 at toc@ha
+; CHECK-P9-NEXT: ld r3, .LC2 at toc@l(r3)
+; CHECK-P9-NEXT: xxpermdi vs0, vs1, vs0, 1
+; CHECK-P9-NEXT: stxvx vs0, 0, r3
+; CHECK-P9-NEXT: blr
+;
+; CHECK-P9-NOVECTOR-LABEL: bar1:
+; CHECK-P9-NOVECTOR: # %bb.0: # %entry
+; CHECK-P9-NOVECTOR-NEXT: addis r3, r2, .LC0 at toc@ha
+; CHECK-P9-NOVECTOR-NEXT: ld r3, .LC0 at toc@l(r3)
+; CHECK-P9-NOVECTOR-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P9-NOVECTOR-NEXT: addis r3, r2, .LC1 at toc@ha
+; CHECK-P9-NOVECTOR-NEXT: ld r3, .LC1 at toc@l(r3)
+; CHECK-P9-NOVECTOR-NEXT: lfdx f1, 0, r3
+; CHECK-P9-NOVECTOR-NEXT: addis r3, r2, .LC2 at toc@ha
+; CHECK-P9-NOVECTOR-NEXT: ld r3, .LC2 at toc@l(r3)
+; CHECK-P9-NOVECTOR-NEXT: xxswapd vs1, vs1
+; CHECK-P9-NOVECTOR-NEXT: xxpermdi vs0, vs0, vs1, 1
+; CHECK-P9-NOVECTOR-NEXT: stxvd2x vs0, 0, r3
+; CHECK-P9-NOVECTOR-NEXT: blr
entry:
%0 = load <2 x double>, <2 x double>* @x, align 16
%1 = load double, double* @y, align 8
diff --git a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
index a198604f79a4b..331d7864a2289 100644
--- a/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
+++ b/llvm/test/CodeGen/PowerPC/vsx_insert_extract_le.ll
@@ -3,6 +3,10 @@
; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \
; RUN: | FileCheck %s
+; RUN: llc -verify-machineinstrs -mcpu=pwr8 -mattr=+vsx -ppc-vsr-nums-as-vr \
+; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64-unknown-linux-gnu < %s \
+; RUN: | FileCheck %s --check-prefix=CHECK-P8-BE
+
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mattr=-power9-vector -ppc-vsr-nums-as-vr \
; RUN: -ppc-asm-full-reg-names -mtriple=powerpc64le-unknown-linux-gnu < %s \
; RUN: | FileCheck --check-prefix=CHECK-P9-VECTOR %s
@@ -20,6 +24,13 @@ define <2 x double> @testi0(<2 x double>* %p1, double* %p2) {
; CHECK-NEXT: xxmrghd v2, vs0, vs1
; CHECK-NEXT: blr
;
+; CHECK-P8-BE-LABEL: testi0:
+; CHECK-P8-BE: # %bb.0:
+; CHECK-P8-BE-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-BE-NEXT: lfdx f1, 0, r4
+; CHECK-P8-BE-NEXT: xxpermdi v2, vs1, vs0, 1
+; CHECK-P8-BE-NEXT: blr
+;
; CHECK-P9-VECTOR-LABEL: testi0:
; CHECK-P9-VECTOR: # %bb.0:
; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3
@@ -30,10 +41,9 @@ define <2 x double> @testi0(<2 x double>* %p1, double* %p2) {
;
; CHECK-P9-LABEL: testi0:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: lfd f1, 0(r4)
; CHECK-P9-NEXT: lxv vs0, 0(r3)
-; CHECK-P9-NEXT: xxswapd vs1, f1
-; CHECK-P9-NEXT: xxpermdi v2, vs0, vs1, 1
+; CHECK-P9-NEXT: lfd f1, 0(r4)
+; CHECK-P9-NEXT: xxmrghd v2, vs0, vs1
; CHECK-P9-NEXT: blr
%v = load <2 x double>, <2 x double>* %p1
%s = load double, double* %p2
@@ -52,6 +62,13 @@ define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {
; CHECK-NEXT: xxpermdi v2, vs1, vs0, 1
; CHECK-NEXT: blr
;
+; CHECK-P8-BE-LABEL: testi1:
+; CHECK-P8-BE: # %bb.0:
+; CHECK-P8-BE-NEXT: lxvd2x vs0, 0, r3
+; CHECK-P8-BE-NEXT: lfdx f1, 0, r4
+; CHECK-P8-BE-NEXT: xxmrghd v2, vs0, vs1
+; CHECK-P8-BE-NEXT: blr
+;
; CHECK-P9-VECTOR-LABEL: testi1:
; CHECK-P9-VECTOR: # %bb.0:
; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3
@@ -62,10 +79,9 @@ define <2 x double> @testi1(<2 x double>* %p1, double* %p2) {
;
; CHECK-P9-LABEL: testi1:
; CHECK-P9: # %bb.0:
-; CHECK-P9-NEXT: lfd f1, 0(r4)
; CHECK-P9-NEXT: lxv vs0, 0(r3)
-; CHECK-P9-NEXT: xxswapd vs1, f1
-; CHECK-P9-NEXT: xxmrgld v2, vs1, vs0
+; CHECK-P9-NEXT: lfd f1, 0(r4)
+; CHECK-P9-NEXT: xxpermdi v2, vs1, vs0, 1
; CHECK-P9-NEXT: blr
%v = load <2 x double>, <2 x double>* %p1
%s = load double, double* %p2
@@ -82,6 +98,11 @@ define double @teste0(<2 x double>* %p1) {
; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; CHECK-NEXT: blr
;
+; CHECK-P8-BE-LABEL: teste0:
+; CHECK-P8-BE: # %bb.0:
+; CHECK-P8-BE-NEXT: lfdx f1, 0, r3
+; CHECK-P8-BE-NEXT: blr
+;
; CHECK-P9-VECTOR-LABEL: teste0:
; CHECK-P9-VECTOR: # %bb.0:
; CHECK-P9-VECTOR-NEXT: lxvd2x vs1, 0, r3
@@ -107,6 +128,11 @@ define double @teste1(<2 x double>* %p1) {
; CHECK-NEXT: # kill: def $f1 killed $f1 killed $vsl1
; CHECK-NEXT: blr
;
+; CHECK-P8-BE-LABEL: teste1:
+; CHECK-P8-BE: # %bb.0:
+; CHECK-P8-BE-NEXT: lfd f1, 8(r3)
+; CHECK-P8-BE-NEXT: blr
+;
; CHECK-P9-VECTOR-LABEL: teste1:
; CHECK-P9-VECTOR: # %bb.0:
; CHECK-P9-VECTOR-NEXT: lxvd2x vs0, 0, r3
More information about the llvm-commits
mailing list