[llvm-branch-commits] [llvm] eeba70a - [VE] Expand single-element BUILD_VECTOR to INSERT_VECTOR_ELT
Simon Moll via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Jan 8 02:52:43 PST 2021
Author: Simon Moll
Date: 2021-01-08T11:48:01+01:00
New Revision: eeba70a463c70c76868421e08c4036c70bfad994
URL: https://github.com/llvm/llvm-project/commit/eeba70a463c70c76868421e08c4036c70bfad994
DIFF: https://github.com/llvm/llvm-project/commit/eeba70a463c70c76868421e08c4036c70bfad994.diff
LOG: [VE] Expand single-element BUILD_VECTOR to INSERT_VECTOR_ELT
We do this mostly to be able to test the insert_vector_elt isel
patterns. As long as we don't, most single element insertions show up as
`BUILD_VECTOR` in the backend.
Reviewed By: kaz7
Differential Revision: https://reviews.llvm.org/D93759
Added:
llvm/test/CodeGen/VE/Vector/expand_single_elem_build_vec.ll
Modified:
llvm/lib/Target/VE/VEISelLowering.cpp
llvm/test/CodeGen/VE/Vector/insert_elt.ll
Removed:
################################################################################
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index a0d00ebca010..a1b464091cd8 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -1602,6 +1602,32 @@ SDValue VETargetLowering::lowerINTRINSIC_WO_CHAIN(SDValue Op,
}
}
+static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) {
+ if (!isa<BuildVectorSDNode>(N))
+ return false;
+ const auto *BVN = cast<BuildVectorSDNode>(N);
+
+ // Find first non-undef insertion.
+ unsigned Idx;
+ for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) {
+ auto ElemV = BVN->getOperand(Idx);
+ if (!ElemV->isUndef())
+ break;
+ }
+ // Catch the (hypothetical) all-undef case.
+ if (Idx == BVN->getNumOperands())
+ return false;
+ // Remember insertion.
+ UniqueIdx = Idx++;
+ // Verify that all other insertions are undef.
+ for (; Idx < BVN->getNumOperands(); ++Idx) {
+ auto ElemV = BVN->getOperand(Idx);
+ if (!ElemV->isUndef())
+ return false;
+ }
+ return true;
+}
+
static SDValue getSplatValue(SDNode *N) {
if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(N)) {
return BuildVec->getSplatValue();
@@ -1615,6 +1641,17 @@ SDValue VETargetLowering::lowerBUILD_VECTOR(SDValue Op,
unsigned NumEls = Op.getValueType().getVectorNumElements();
MVT ElemVT = Op.getSimpleValueType().getVectorElementType();
+ // If there is just one element, expand to INSERT_VECTOR_ELT.
+ unsigned UniqueIdx;
+ if (getUniqueInsertion(Op.getNode(), UniqueIdx)) {
+ SDValue AccuV = DAG.getUNDEF(Op.getValueType());
+ auto ElemV = Op->getOperand(UniqueIdx);
+ SDValue IdxV = DAG.getConstant(UniqueIdx, DL, MVT::i64);
+ return DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(), AccuV,
+ ElemV, IdxV);
+ }
+
+ // Else emit a broadcast.
if (SDValue ScalarV = getSplatValue(Op.getNode())) {
// lower to VEC_BROADCAST
MVT LegalResVT = MVT::getVectorVT(ElemVT, 256);
diff --git a/llvm/test/CodeGen/VE/Vector/expand_single_elem_build_vec.ll b/llvm/test/CodeGen/VE/Vector/expand_single_elem_build_vec.ll
new file mode 100644
index 000000000000..42c455319246
--- /dev/null
+++ b/llvm/test/CodeGen/VE/Vector/expand_single_elem_build_vec.ll
@@ -0,0 +1,13 @@
+; RUN: llc < %s -march=ve -mattr=+vpu | FileCheck %s
+
+; Function Attrs: norecurse nounwind readnone
+; Check that a single-element insertion is lowered to a insert_vector_elt node for isel.
+define fastcc <256 x i32> @expand_single_elem_build_vec(i32 %x, i32 %y) {
+; CHECK-LABEL: expand_single_elem_build_vec:
+; CHECK: # %bb.0:
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lsv %v0(42), %s0
+; CHECK-NEXT: b.l.t (, %s10)
+ %r = insertelement <256 x i32> undef, i32 %x, i32 42
+ ret <256 x i32> %r
+}
diff --git a/llvm/test/CodeGen/VE/Vector/insert_elt.ll b/llvm/test/CodeGen/VE/Vector/insert_elt.ll
index 7ccd45690e9d..3004699e26d4 100644
--- a/llvm/test/CodeGen/VE/Vector/insert_elt.ll
+++ b/llvm/test/CodeGen/VE/Vector/insert_elt.ll
@@ -15,9 +15,7 @@ define fastcc <256 x i64> @insert_rr_v256i64(i32 signext %idx, i64 %s) {
define fastcc <256 x i64> @insert_ri7_v256i64(i64 %s) {
; CHECK-LABEL: insert_ri7_v256i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lea %s0, 256
-; CHECK-NEXT: lvl %s0
-; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: lsv %v0(127), %s0
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <256 x i64> undef, i64 %s, i32 127
ret <256 x i64> %ret
@@ -26,9 +24,8 @@ define fastcc <256 x i64> @insert_ri7_v256i64(i64 %s) {
define fastcc <256 x i64> @insert_ri8_v256i64(i64 %s) {
; CHECK-LABEL: insert_ri8_v256i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lea %s0, 256
-; CHECK-NEXT: lvl %s0
-; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: lea %s1, 128
+; CHECK-NEXT: lsv %v0(%s1), %s0
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <256 x i64> undef, i64 %s, i32 128
ret <256 x i64> %ret
@@ -37,9 +34,7 @@ define fastcc <256 x i64> @insert_ri8_v256i64(i64 %s) {
define fastcc <512 x i64> @insert_ri_v512i64(i64 %s) {
; CHECK-LABEL: insert_ri_v512i64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lea %s0, 256
-; CHECK-NEXT: lvl %s0
-; CHECK-NEXT: vbrd %v1, %s0
+; CHECK-NEXT: lsv %v1(116), %s0
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <512 x i64> undef, i64 %s, i32 372
ret <512 x i64> %ret
@@ -60,9 +55,8 @@ define fastcc <256 x i32> @insert_rr_v256i32(i32 signext %idx, i32 signext %s) {
define fastcc <256 x i32> @insert_ri7_v256i32(i32 signext %s) {
; CHECK-LABEL: insert_ri7_v256i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: lea %s0, 256
-; CHECK-NEXT: lvl %s0
-; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lsv %v0(127), %s0
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <256 x i32> undef, i32 %s, i32 127
ret <256 x i32> %ret
@@ -71,9 +65,9 @@ define fastcc <256 x i32> @insert_ri7_v256i32(i32 signext %s) {
define fastcc <256 x i32> @insert_ri8_v256i32(i32 signext %s) {
; CHECK-LABEL: insert_ri8_v256i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: lea %s0, 256
-; CHECK-NEXT: lvl %s0
-; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lea %s1, 128
+; CHECK-NEXT: lsv %v0(%s1), %s0
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <256 x i32> undef, i32 %s, i32 128
ret <256 x i32> %ret
@@ -82,9 +76,12 @@ define fastcc <256 x i32> @insert_ri8_v256i32(i32 signext %s) {
define fastcc <512 x i32> @insert_ri_v512i32(i32 signext %s) {
; CHECK-LABEL: insert_ri_v512i32:
; CHECK: # %bb.0:
-; CHECK-NEXT: lea %s0, 512
-; CHECK-NEXT: lvl %s0
-; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: lea %s1, 186
+; CHECK-NEXT: lvs %s2, %v0(%s1)
+; CHECK-NEXT: and %s2, %s2, (32)0
+; CHECK-NEXT: sll %s0, %s0, 32
+; CHECK-NEXT: or %s0, %s2, %s0
+; CHECK-NEXT: lsv %v0(%s1), %s0
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <512 x i32> undef, i32 %s, i32 372
ret <512 x i32> %ret
@@ -122,9 +119,7 @@ define fastcc <256 x double> @insert_rr_v256f64(i32 signext %idx, double %s) {
define fastcc <256 x double> @insert_ri7_v256f64(double %s) {
; CHECK-LABEL: insert_ri7_v256f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lea %s0, 256
-; CHECK-NEXT: lvl %s0
-; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: lsv %v0(127), %s0
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <256 x double> undef, double %s, i32 127
ret <256 x double> %ret
@@ -133,9 +128,8 @@ define fastcc <256 x double> @insert_ri7_v256f64(double %s) {
define fastcc <256 x double> @insert_ri8_v256f64(double %s) {
; CHECK-LABEL: insert_ri8_v256f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lea %s0, 256
-; CHECK-NEXT: lvl %s0
-; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: lea %s1, 128
+; CHECK-NEXT: lsv %v0(%s1), %s0
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <256 x double> undef, double %s, i32 128
ret <256 x double> %ret
@@ -144,9 +138,7 @@ define fastcc <256 x double> @insert_ri8_v256f64(double %s) {
define fastcc <512 x double> @insert_ri_v512f64(double %s) {
; CHECK-LABEL: insert_ri_v512f64:
; CHECK: # %bb.0:
-; CHECK-NEXT: lea %s0, 256
-; CHECK-NEXT: lvl %s0
-; CHECK-NEXT: vbrd %v1, %s0
+; CHECK-NEXT: lsv %v1(116), %s0
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <512 x double> undef, double %s, i32 372
ret <512 x double> %ret
@@ -166,9 +158,7 @@ define fastcc <256 x float> @insert_rr_v256f32(i32 signext %idx, float %s) {
define fastcc <256 x float> @insert_ri7_v256f32(float %s) {
; CHECK-LABEL: insert_ri7_v256f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: lea %s0, 256
-; CHECK-NEXT: lvl %s0
-; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: lsv %v0(127), %s0
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <256 x float> undef, float %s, i32 127
ret <256 x float> %ret
@@ -177,9 +167,8 @@ define fastcc <256 x float> @insert_ri7_v256f32(float %s) {
define fastcc <256 x float> @insert_ri8_v256f32(float %s) {
; CHECK-LABEL: insert_ri8_v256f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: lea %s0, 256
-; CHECK-NEXT: lvl %s0
-; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: lea %s1, 128
+; CHECK-NEXT: lsv %v0(%s1), %s0
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <256 x float> undef, float %s, i32 128
ret <256 x float> %ret
@@ -188,9 +177,13 @@ define fastcc <256 x float> @insert_ri8_v256f32(float %s) {
define fastcc <512 x float> @insert_ri_v512f32(float %s) {
; CHECK-LABEL: insert_ri_v512f32:
; CHECK: # %bb.0:
-; CHECK-NEXT: lea %s0, 512
-; CHECK-NEXT: lvl %s0
-; CHECK-NEXT: vbrd %v0, %s0
+; CHECK-NEXT: sra.l %s0, %s0, 32
+; CHECK-NEXT: lea %s1, 186
+; CHECK-NEXT: lvs %s2, %v0(%s1)
+; CHECK-NEXT: and %s2, %s2, (32)0
+; CHECK-NEXT: sll %s0, %s0, 32
+; CHECK-NEXT: or %s0, %s2, %s0
+; CHECK-NEXT: lsv %v0(%s1), %s0
; CHECK-NEXT: b.l.t (, %s10)
%ret = insertelement <512 x float> undef, float %s, i32 372
ret <512 x float> %ret
More information about the llvm-branch-commits
mailing list