[PATCH] D84463: [LegalizeTypes] Teach DAGTypeLegalizer::GenWidenVectorLoads to pad with undef if needed when concatenating small or loads to match a larger load

Thu Jul 23 13:42:38 PDT 2020

craig.topper updated this revision to Diff 280247.
craig.topper added a comment.

-Add CHECK lines to the test


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D84463/new/

https://reviews.llvm.org/D84463

Files:
  llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
  llvm/test/CodeGen/X86/pr46820.ll


Index: llvm/test/CodeGen/X86/pr46820.ll
===================================================================

--- /dev/null
+++ llvm/test/CodeGen/X86/pr46820.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512f | FileCheck %s
+
+; The alignment of 16 causes type legalization to split this as 3 loads,
+; v16f32, v4f32, and v4f32. There was an issue with type legalization building
+; the proper concat_vectors for this because the two v4f32s don't add up to
+; v16f32 and require padding.
+
+define <23 x float> @load23(<23 x float>* %p) {
+; CHECK-LABEL: load23:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    movq %rdi, %rax
+; CHECK-NEXT:    vmovups 64(%rsi), %ymm0
+; CHECK-NEXT:    vmovups (%rsi), %zmm1
+; CHECK-NEXT:    vmovaps 64(%rsi), %xmm2
+; CHECK-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT:    vmovss %xmm3, 88(%rdi)
+; CHECK-NEXT:    vmovaps %xmm2, 64(%rdi)
+; CHECK-NEXT:    vmovaps %zmm1, (%rdi)
+; CHECK-NEXT:    vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT:    vmovlps %xmm0, 80(%rdi)
+; CHECK-NEXT:    vzeroupper
+; CHECK-NEXT:    retq
+  %t0 = load <23 x float>, <23 x float>* %p, align 16
+  ret <23 x float> %t0
+}
Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4964,19 +4964,6 @@
                       LD->getPointerInfo().getWithOffset(Offset),
                       LD->getOriginalAlign(), MMOFlags, AAInfo);
       LdChain.push_back(L.getValue(1));
-      if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) {
-        // Later code assumes the vector loads produced will be mergeable, so we
-        // must pad the final entry up to the previous width. Scalars are
-        // combined separately.
-        SmallVector<SDValue, 16> Loads;
-        Loads.push_back(L);
-        unsigned size = L->getValueSizeInBits(0);
-        while (size < LdOp->getValueSizeInBits(0)) {
-          Loads.push_back(DAG.getUNDEF(L->getValueType(0)));
-          size += L->getValueSizeInBits(0);
-        }
-        L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), Loads);
-      }
     } else {
       L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
                       LD->getPointerInfo().getWithOffset(Offset),
@@ -5017,8 +5004,17 @@
     EVT NewLdTy = LdOps[i].getValueType();
     if (NewLdTy != LdTy) {
       // Create a larger vector.
+      unsigned NumOps = NewLdTy.getSizeInBits() / LdTy.getSizeInBits();
+      assert(NewLdTy.getSizeInBits() % LdTy.getSizeInBits() == 0);
+      SmallVector<SDValue, 16> WidenOps(NumOps);
+      unsigned j = 0;
+      for (; j != End-Idx; ++j)
+        WidenOps[j] = ConcatOps[Idx+j];
+      for (; j != NumOps; ++j)
+        WidenOps[j] = DAG.getUNDEF(LdTy);
+
       ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
-                                     makeArrayRef(&ConcatOps[Idx], End - Idx));
+                                     WidenOps);
       Idx = End - 1;
       LdTy = NewLdTy;
     }


-------------- next part --------------
A non-text attachment was scrubbed...
Name: D84463.280247.patch
Type: text/x-patch
Size: 3231 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200723/7ba3212f/attachment.bin>