[llvm] a5fa778 - [LegalizeTypes] Scalarize non-byte sized loads in WidenRecRes_Load and SplitVecResLoad

Craig Topper via llvm-commits llvm-commits at lists.llvm.org
Mon Feb 24 15:14:57 PST 2020


Author: Craig Topper
Date: 2020-02-24T15:14:33-08:00
New Revision: a5fa77888243beede98959ced17596b380e8de59

URL: https://github.com/llvm/llvm-project/commit/a5fa77888243beede98959ced17596b380e8de59
DIFF: https://github.com/llvm/llvm-project/commit/a5fa77888243beede98959ced17596b380e8de59.diff

LOG: [LegalizeTypes] Scalarize non-byte sized loads in WidenRecRes_Load and SplitVecResLoad

Should fix PR42803 and PR44902

Differential Revision: https://reviews.llvm.org/D74590

Added: 
    

Modified: 
    llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll
    llvm/test/CodeGen/X86/load-local-v3i1.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 5e4c35263b7f..5a4b4c615bc0 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -1505,6 +1505,14 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
   EVT LoMemVT, HiMemVT;
   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
 
+  if (!LoMemVT.isByteSized() || !HiMemVT.isByteSized()) {
+    SDValue Value, NewChain;
+    std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG);
+    std::tie(Lo, Hi) = DAG.SplitVector(Value, dl);
+    ReplaceValueWith(SDValue(LD, 1), NewChain);
+    return;
+  }
+
   Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset,
                    LD->getPointerInfo(), LoMemVT, Alignment, MMOFlags, AAInfo);
 
@@ -3667,6 +3675,20 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) {
   LoadSDNode *LD = cast<LoadSDNode>(N);
   ISD::LoadExtType ExtType = LD->getExtensionType();
 
+  // A vector must always be stored in memory as-is, i.e. without any padding
+  // between the elements, since various code depend on it, e.g. in the
+  // handling of a bitcast of a vector type to int, which may be done with a
+  // vector store followed by an integer load. A vector that does not have
+  // elements that are byte-sized must therefore be stored as an integer
+  // built out of the extracted vector elements.
+  if (!LD->getMemoryVT().isByteSized()) {
+    SDValue Value, NewChain;
+    std::tie(Value, NewChain) = TLI.scalarizeVectorLoad(LD, DAG);
+    ReplaceValueWith(SDValue(LD, 0), Value);
+    ReplaceValueWith(SDValue(LD, 1), NewChain);
+    return SDValue();
+  }
+
   SDValue Result;
   SmallVector<SDValue, 16> LdChain;  // Chain for the series of load
   if (ExtType != ISD::NON_EXTLOAD)

diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index f3f9c6dd7003..6dc99e761656 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -6584,12 +6584,48 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
   SDValue Chain = LD->getChain();
   SDValue BasePTR = LD->getBasePtr();
   EVT SrcVT = LD->getMemoryVT();
+  EVT DstVT = LD->getValueType(0);
   ISD::LoadExtType ExtType = LD->getExtensionType();
 
   unsigned NumElem = SrcVT.getVectorNumElements();
 
   EVT SrcEltVT = SrcVT.getScalarType();
-  EVT DstEltVT = LD->getValueType(0).getScalarType();
+  EVT DstEltVT = DstVT.getScalarType();
+
+  // A vector must always be stored in memory as-is, i.e. without any padding
+  // between the elements, since various code depend on it, e.g. in the
+  // handling of a bitcast of a vector type to int, which may be done with a
+  // vector store followed by an integer load. A vector that does not have
+  // elements that are byte-sized must therefore be stored as an integer
+  // built out of the extracted vector elements.
+  if (!SrcEltVT.isByteSized()) {
+    unsigned NumBits = SrcVT.getSizeInBits();
+    EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), NumBits);
+
+    SDValue Load = DAG.getLoad(IntVT, SL, Chain, BasePTR, LD->getPointerInfo(),
+                               LD->getAlignment(),
+                               LD->getMemOperand()->getFlags(),
+                               LD->getAAInfo());
+
+    SmallVector<SDValue, 8> Vals;
+    for (unsigned Idx = 0; Idx < NumElem; ++Idx) {
+      unsigned ShiftIntoIdx =
+          (DAG.getDataLayout().isBigEndian() ? (NumElem - 1) - Idx : Idx);
+      SDValue ShiftAmount =
+          DAG.getConstant(ShiftIntoIdx * SrcEltVT.getSizeInBits(), SL, IntVT);
+      SDValue ShiftedElt =
+          DAG.getNode(ISD::SRL, SL, IntVT, Load, ShiftAmount);
+      SDValue Scalar = DAG.getNode(ISD::TRUNCATE, SL, SrcEltVT, ShiftedElt);
+      if (ExtType != ISD::NON_EXTLOAD) {
+        unsigned ExtendOp = ISD::getExtForLoadExtType(false, ExtType);
+        Scalar = DAG.getNode(ExtendOp, SL, DstEltVT, Scalar);
+      }
+      Vals.push_back(Scalar);
+    }
+
+    SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
+    return std::make_pair(Value, Load.getValue(1));
+  }
 
   unsigned Stride = SrcEltVT.getSizeInBits() / 8;
   assert(SrcEltVT.isByteSized());
@@ -6611,7 +6647,7 @@ TargetLowering::scalarizeVectorLoad(LoadSDNode *LD,
   }
 
   SDValue NewChain = DAG.getNode(ISD::TokenFactor, SL, MVT::Other, LoadChains);
-  SDValue Value = DAG.getBuildVector(LD->getValueType(0), SL, Vals);
+  SDValue Value = DAG.getBuildVector(DstVT, SL, Vals);
 
   return std::make_pair(Value, NewChain);
 }

diff  --git a/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll b/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll
index 45434ed27196..67d9b0641b85 100644
--- a/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll
+++ b/llvm/test/CodeGen/SystemZ/store_nonbytesized_vecs.ll
@@ -121,18 +121,20 @@ define void @fun2(<8 x i32> %src, <8 x i31>* %p)
 define void @fun3(<3 x i31>* %src, <3 x i31>* %p)
 ; CHECK-LABEL: fun3:
 ; CHECK:       # %bb.0:
-; CHECK-NEXT:    llgf %r0, 3(%r2)
-; CHECK-NEXT:    llgf %r1, 6(%r2)
-; CHECK-NEXT:    llgf %r2, 0(%r2)
-; CHECK-NEXT:    rosbg %r1, %r0, 0, 32, 31
-; CHECK-NEXT:    sllg %r4, %r2, 62
-; CHECK-NEXT:    rosbg %r4, %r0, 0, 32, 31
-; CHECK-NEXT:    srlg %r0, %r4, 32
-; CHECK-NEXT:    st %r1, 8(%r3)
-; CHECK-NEXT:    sllg %r1, %r2, 30
-; CHECK-NEXT:    lr %r1, %r0
-; CHECK-NEXT:    nihh %r1, 8191
-; CHECK-NEXT:    stg %r1, 0(%r3)
+; CHECK-NEXT:    l %r0, 8(%r2)
+; CHECK-NEXT:    lg %r1, 0(%r2)
+; CHECK-NEXT:    sllg %r2, %r1, 32
+; CHECK-NEXT:    lr %r2, %r0
+; CHECK-NEXT:    srlg %r0, %r2, 62
+; CHECK-NEXT:    st %r2, 8(%r3)
+; CHECK-NEXT:    rosbg %r0, %r1, 33, 61, 34
+; CHECK-NEXT:    sllg %r1, %r0, 62
+; CHECK-NEXT:    rosbg %r1, %r2, 2, 32, 0
+; CHECK-NEXT:    srlg %r1, %r1, 32
+; CHECK-NEXT:    sllg %r0, %r0, 30
+; CHECK-NEXT:    lr %r0, %r1
+; CHECK-NEXT:    nihh %r0, 8191
+; CHECK-NEXT:    stg %r0, 0(%r3)
 ; CHECK-NEXT:    br %r14
 {
   %tmp = load <3 x i31>, <3 x i31>* %src

diff  --git a/llvm/test/CodeGen/X86/load-local-v3i1.ll b/llvm/test/CodeGen/X86/load-local-v3i1.ll
index 6be3cc1b751a..da432cb1ab34 100644
--- a/llvm/test/CodeGen/X86/load-local-v3i1.ll
+++ b/llvm/test/CodeGen/X86/load-local-v3i1.ll
@@ -96,27 +96,21 @@ define void @local_load_v3i1(i32 addrspace(1)* %out, i32 addrspace(1)* %in, <3 x
 ; CHECK-NEXT:    pushq %rbx
 ; CHECK-NEXT:    pushq %rax
 ; CHECK-NEXT:    movq %rdi, %r14
-; CHECK-NEXT:    movzbl (%rdx), %ebp
-; CHECK-NEXT:    movl %ebp, %eax
-; CHECK-NEXT:    shrl %eax
-; CHECK-NEXT:    andl $1, %eax
-; CHECK-NEXT:    movl %ebp, %ecx
-; CHECK-NEXT:    andl $1, %ecx
-; CHECK-NEXT:    movd %ecx, %xmm0
-; CHECK-NEXT:    pinsrd $1, %eax, %xmm0
-; CHECK-NEXT:    shrl $2, %ebp
-; CHECK-NEXT:    andl $1, %ebp
-; CHECK-NEXT:    pinsrd $2, %ebp, %xmm0
-; CHECK-NEXT:    movd %xmm0, %ebx
-; CHECK-NEXT:    pextrd $1, %xmm0, %r15d
+; CHECK-NEXT:    movb (%rdx), %al
+; CHECK-NEXT:    movl %eax, %ecx
+; CHECK-NEXT:    shrb $2, %cl
+; CHECK-NEXT:    movzbl %al, %r15d
+; CHECK-NEXT:    shrb %al
+; CHECK-NEXT:    movzbl %al, %ebx
+; CHECK-NEXT:    movzbl %cl, %ebp
 ; CHECK-NEXT:    movq %rsi, %rdi
-; CHECK-NEXT:    movl %ebx, %esi
-; CHECK-NEXT:    movl %r15d, %edx
+; CHECK-NEXT:    movl %r15d, %esi
+; CHECK-NEXT:    movl %ebx, %edx
 ; CHECK-NEXT:    movl %ebp, %ecx
 ; CHECK-NEXT:    callq masked_load_v3
 ; CHECK-NEXT:    movq %r14, %rdi
-; CHECK-NEXT:    movl %ebx, %esi
-; CHECK-NEXT:    movl %r15d, %edx
+; CHECK-NEXT:    movl %r15d, %esi
+; CHECK-NEXT:    movl %ebx, %edx
 ; CHECK-NEXT:    movl %ebp, %ecx
 ; CHECK-NEXT:    callq masked_store4_v3
 ; CHECK-NEXT:    addq $8, %rsp


        


More information about the llvm-commits mailing list