[llvm] 543b901 - [LegalizeVectorTypes] Improve SplitVecRes_INSERT_SUBVECTOR to handle subvector being in the high half of the split or not at element 0 of the low half.

Tue Mar 2 21:14:22 PST 2021

Author: Craig Topper
Date: 2021-03-02T21:14:13-08:00
New Revision: 543b901e581b732225db4277a8e995ac8dde62b7

URL: https://github.com/llvm/llvm-project/commit/543b901e581b732225db4277a8e995ac8dde62b7
DIFF: https://github.com/llvm/llvm-project/commit/543b901e581b732225db4277a8e995ac8dde62b7.diff

LOG: [LegalizeVectorTypes] Improve SplitVecRes_INSERT_SUBVECTOR to handle subvector being in the high half of the split or not at element 0 of the low half.

This function isn't exercised in lit tests today today according to
the code coverage report. But will be after the tests in D97543 and
D97559.

Posting this patch to help a crash that Fraser hit.

Reviewed By: frasercrmck

Differential Revision: https://reviews.llvm.org/D97582

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
    llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index dc0614a3d938..f70406416347 100644

--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1258,22 +1258,25 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_SUBVECTOR(SDNode *N, SDValue &Lo,
   GetSplitVector(Vec, Lo, Hi);
 
   EVT VecVT = Vec.getValueType();
+  EVT LoVT = Lo.getValueType();
   unsigned VecElems = VecVT.getVectorNumElements();
   unsigned SubElems = SubVec.getValueType().getVectorNumElements();
+  unsigned LoElems = LoVT.getVectorNumElements();
 
-  // If we know the index is 0, and we know the subvector doesn't cross the
-  // boundary between the halves, we can avoid spilling the vector, and insert
-  // into the lower half of the split vector directly.
-  // TODO: The IdxVal == 0 constraint is artificial, we could do this whenever
-  // there is no boundary crossing. But those cases don't seem to get hit in
-  // practice.
+  // If we know the index is in the first half, and we know the subvector
+  // doesn't cross the boundary between the halves, we can avoid spilling the
+  // vector, and insert into the lower half of the split vector directly.
+  // Similarly if the subvector is fully in the high half.
   unsigned IdxVal = cast<ConstantSDNode>(Idx)->getZExtValue();
-  if ((IdxVal == 0) && (IdxVal + SubElems <= VecElems / 2)) {
-    EVT LoVT, HiVT;
-    std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
+  if (IdxVal >= 0 && IdxVal + SubElems <= LoElems) {
     Lo = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, LoVT, Lo, SubVec, Idx);
     return;
   }
+  if (IdxVal >= LoElems && IdxVal + SubElems <= VecElems) {
+    Hi = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, Hi.getValueType(), Hi, SubVec,
+                     DAG.getVectorIdxConstant(IdxVal - LoElems, dl));
+    return;
+  }
 
   // Spill the vector to the stack.
   // In cases where the vector is illegal it will be broken down into parts

diff  --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
index fbc6dcfbe207..00ab5a70b946 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-insert-subvector.ll
@@ -229,20 +229,14 @@ define void @insert_v8i32_v2i32_2(<8 x i32>* %vp, <2 x i32>* %svp) {
 ;
 ; LMULMAX1-LABEL: insert_v8i32_v2i32_2:
 ; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi sp, sp, -32
-; LMULMAX1-NEXT:    .cfi_def_cfa_offset 32
 ; LMULMAX1-NEXT:    vsetivli a2, 2, e32,m1,ta,mu
 ; LMULMAX1-NEXT:    vle32.v v25, (a1)
 ; LMULMAX1-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
 ; LMULMAX1-NEXT:    vle32.v v26, (a0)
-; LMULMAX1-NEXT:    vse32.v v26, (sp)
-; LMULMAX1-NEXT:    addi a1, sp, 8
-; LMULMAX1-NEXT:    vsetivli a2, 2, e32,m1,ta,mu
-; LMULMAX1-NEXT:    vse32.v v25, (a1)
+; LMULMAX1-NEXT:    vsetivli a1, 4, e32,m1,tu,mu
+; LMULMAX1-NEXT:    vslideup.vi v26, v25, 2
 ; LMULMAX1-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX1-NEXT:    vle32.v v25, (sp)
-; LMULMAX1-NEXT:    vse32.v v25, (a0)
-; LMULMAX1-NEXT:    addi sp, sp, 32
+; LMULMAX1-NEXT:    vse32.v v26, (a0)
 ; LMULMAX1-NEXT:    ret
   %sv = load <2 x i32>, <2 x i32>* %svp
   %vec = load <8 x i32>, <8 x i32>* %vp
@@ -266,22 +260,15 @@ define void @insert_v8i32_v2i32_6(<8 x i32>* %vp, <2 x i32>* %svp) {
 ;
 ; LMULMAX1-LABEL: insert_v8i32_v2i32_6:
 ; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi sp, sp, -32
-; LMULMAX1-NEXT:    .cfi_def_cfa_offset 32
 ; LMULMAX1-NEXT:    vsetivli a2, 2, e32,m1,ta,mu
 ; LMULMAX1-NEXT:    vle32.v v25, (a1)
 ; LMULMAX1-NEXT:    addi a0, a0, 16
 ; LMULMAX1-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
 ; LMULMAX1-NEXT:    vle32.v v26, (a0)
-; LMULMAX1-NEXT:    addi a1, sp, 16
-; LMULMAX1-NEXT:    vse32.v v26, (a1)
-; LMULMAX1-NEXT:    addi a2, sp, 24
-; LMULMAX1-NEXT:    vsetivli a3, 2, e32,m1,ta,mu
-; LMULMAX1-NEXT:    vse32.v v25, (a2)
-; LMULMAX1-NEXT:    vsetivli a2, 4, e32,m1,ta,mu
-; LMULMAX1-NEXT:    vle32.v v25, (a1)
-; LMULMAX1-NEXT:    vse32.v v25, (a0)
-; LMULMAX1-NEXT:    addi sp, sp, 32
+; LMULMAX1-NEXT:    vsetivli a1, 4, e32,m1,tu,mu
+; LMULMAX1-NEXT:    vslideup.vi v26, v25, 2
+; LMULMAX1-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
+; LMULMAX1-NEXT:    vse32.v v26, (a0)
 ; LMULMAX1-NEXT:    ret
   %sv = load <2 x i32>, <2 x i32>* %svp
   %vec = load <8 x i32>, <8 x i32>* %vp
@@ -302,20 +289,12 @@ define void @insert_v8i32_undef_v2i32_6(<8 x i32>* %vp, <2 x i32>* %svp) {
 ;
 ; LMULMAX1-LABEL: insert_v8i32_undef_v2i32_6:
 ; LMULMAX1:       # %bb.0:
-; LMULMAX1-NEXT:    addi sp, sp, -32
-; LMULMAX1-NEXT:    .cfi_def_cfa_offset 32
 ; LMULMAX1-NEXT:    vsetivli a2, 2, e32,m1,ta,mu
 ; LMULMAX1-NEXT:    vle32.v v25, (a1)
-; LMULMAX1-NEXT:    addi a1, sp, 24
-; LMULMAX1-NEXT:    vse32.v v25, (a1)
 ; LMULMAX1-NEXT:    vsetivli a1, 4, e32,m1,ta,mu
-; LMULMAX1-NEXT:    vle32.v v25, (sp)
-; LMULMAX1-NEXT:    addi a1, sp, 16
-; LMULMAX1-NEXT:    vle32.v v26, (a1)
-; LMULMAX1-NEXT:    vse32.v v25, (a0)
+; LMULMAX1-NEXT:    vslideup.vi v26, v25, 2
 ; LMULMAX1-NEXT:    addi a0, a0, 16
 ; LMULMAX1-NEXT:    vse32.v v26, (a0)
-; LMULMAX1-NEXT:    addi sp, sp, 32
 ; LMULMAX1-NEXT:    ret
   %sv = load <2 x i32>, <2 x i32>* %svp
   %v = call <8 x i32> @llvm.experimental.vector.insert.v2i32.v8i32(<8 x i32> undef, <2 x i32> %sv, i64 6)
@@ -393,15 +372,38 @@ define void @insert_v32i1_v8i1_0(<32 x i1>* %vp, <8 x i1>* %svp) {
   ret void
 }
 
-; FIXME: SplitVecRes_INSERT_SUBVECTOR crashes on this one when trying to spill
-; to the stack.
-;define void @insert_v32i1_v8i1_16(<32 x i1>* %vp, <8 x i1>* %svp) {
-;  %v = load <32 x i1>, <32 x i1>* %vp
-;  %sv = load <8 x i1>, <8 x i1>* %svp
-;  %c = call <32 x i1> @llvm.experimental.vector.insert.v8i1.v32i1(<32 x i1> %v, <8 x i1> %sv, i64 16)
-;  store <32 x i1> %c, <32 x i1>* %vp
-;  ret void
-;}
+define void @insert_v32i1_v8i1_16(<32 x i1>* %vp, <8 x i1>* %svp) {
+; LMULMAX2-LABEL: insert_v32i1_v8i1_16:
+; LMULMAX2:       # %bb.0:
+; LMULMAX2-NEXT:    addi a2, zero, 32
+; LMULMAX2-NEXT:    vsetvli a3, a2, e8,m2,ta,mu
+; LMULMAX2-NEXT:    vle1.v v25, (a0)
+; LMULMAX2-NEXT:    vsetivli a3, 8, e8,m1,ta,mu
+; LMULMAX2-NEXT:    vle1.v v26, (a1)
+; LMULMAX2-NEXT:    vsetivli a1, 3, e8,m1,tu,mu
+; LMULMAX2-NEXT:    vslideup.vi v25, v26, 2
+; LMULMAX2-NEXT:    vsetvli a1, a2, e8,m2,ta,mu
+; LMULMAX2-NEXT:    vse1.v v25, (a0)
+; LMULMAX2-NEXT:    ret
+;
+; LMULMAX1-LABEL: insert_v32i1_v8i1_16:
+; LMULMAX1:       # %bb.0:
+; LMULMAX1-NEXT:    addi a0, a0, 2
+; LMULMAX1-NEXT:    vsetivli a2, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT:    vle1.v v25, (a0)
+; LMULMAX1-NEXT:    vsetivli a2, 8, e8,m1,ta,mu
+; LMULMAX1-NEXT:    vle1.v v26, (a1)
+; LMULMAX1-NEXT:    vsetivli a1, 1, e8,m1,tu,mu
+; LMULMAX1-NEXT:    vslideup.vi v25, v26, 0
+; LMULMAX1-NEXT:    vsetivli a1, 16, e8,m1,ta,mu
+; LMULMAX1-NEXT:    vse1.v v25, (a0)
+; LMULMAX1-NEXT:    ret
+  %v = load <32 x i1>, <32 x i1>* %vp
+  %sv = load <8 x i1>, <8 x i1>* %svp
+  %c = call <32 x i1> @llvm.experimental.vector.insert.v8i1.v32i1(<32 x i1> %v, <8 x i1> %sv, i64 16)
+  store <32 x i1> %c, <32 x i1>* %vp
+  ret void
+}
 
 define void @insert_v8i1_v4i1_0(<8 x i1>* %vp, <4 x i1>* %svp) {
 ; CHECK-LABEL: insert_v8i1_v4i1_0: