[PATCH] D84463: [LegalizeTypes] Teach DAGTypeLegalizer::GenWidenVectorLoads to pad with undef if needed when concatenating small or loads to match a larger load
Craig Topper via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 23 13:42:38 PDT 2020
craig.topper updated this revision to Diff 280247.
craig.topper added a comment.
-Add CHECK lines to the test
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D84463/new/
https://reviews.llvm.org/D84463
Files:
llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
llvm/test/CodeGen/X86/pr46820.ll
Index: llvm/test/CodeGen/X86/pr46820.ll
===================================================================
--- /dev/null
+++ llvm/test/CodeGen/X86/pr46820.ll
@@ -0,0 +1,26 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=avx512f | FileCheck %s
+
+; The alignment of 16 causes type legalization to split this as 3 loads,
+; v16f32, v4f32, and v4f32. There was an issue with type legalization building
+; the proper concat_vectors for this because the two v4f32s don't add up to
+; v16f32 and require padding.
+
+define <23 x float> @load23(<23 x float>* %p) {
+; CHECK-LABEL: load23:
+; CHECK: # %bb.0:
+; CHECK-NEXT: movq %rdi, %rax
+; CHECK-NEXT: vmovups 64(%rsi), %ymm0
+; CHECK-NEXT: vmovups (%rsi), %zmm1
+; CHECK-NEXT: vmovaps 64(%rsi), %xmm2
+; CHECK-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
+; CHECK-NEXT: vmovss %xmm3, 88(%rdi)
+; CHECK-NEXT: vmovaps %xmm2, 64(%rdi)
+; CHECK-NEXT: vmovaps %zmm1, (%rdi)
+; CHECK-NEXT: vextractf128 $1, %ymm0, %xmm0
+; CHECK-NEXT: vmovlps %xmm0, 80(%rdi)
+; CHECK-NEXT: vzeroupper
+; CHECK-NEXT: retq
+ %t0 = load <23 x float>, <23 x float>* %p, align 16
+ ret <23 x float> %t0
+}
Index: llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
===================================================================
--- llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -4964,19 +4964,6 @@
LD->getPointerInfo().getWithOffset(Offset),
LD->getOriginalAlign(), MMOFlags, AAInfo);
LdChain.push_back(L.getValue(1));
- if (L->getValueType(0).isVector() && NewVTWidth >= LdWidth) {
- // Later code assumes the vector loads produced will be mergeable, so we
- // must pad the final entry up to the previous width. Scalars are
- // combined separately.
- SmallVector<SDValue, 16> Loads;
- Loads.push_back(L);
- unsigned size = L->getValueSizeInBits(0);
- while (size < LdOp->getValueSizeInBits(0)) {
- Loads.push_back(DAG.getUNDEF(L->getValueType(0)));
- size += L->getValueSizeInBits(0);
- }
- L = DAG.getNode(ISD::CONCAT_VECTORS, dl, LdOp->getValueType(0), Loads);
- }
} else {
L = DAG.getLoad(NewVT, dl, Chain, BasePtr,
LD->getPointerInfo().getWithOffset(Offset),
@@ -5017,8 +5004,17 @@
EVT NewLdTy = LdOps[i].getValueType();
if (NewLdTy != LdTy) {
// Create a larger vector.
+ unsigned NumOps = NewLdTy.getSizeInBits() / LdTy.getSizeInBits();
+ assert(NewLdTy.getSizeInBits() % LdTy.getSizeInBits() == 0);
+ SmallVector<SDValue, 16> WidenOps(NumOps);
+ unsigned j = 0;
+ for (; j != End-Idx; ++j)
+ WidenOps[j] = ConcatOps[Idx+j];
+ for (; j != NumOps; ++j)
+ WidenOps[j] = DAG.getUNDEF(LdTy);
+
ConcatOps[End-1] = DAG.getNode(ISD::CONCAT_VECTORS, dl, NewLdTy,
- makeArrayRef(&ConcatOps[Idx], End - Idx));
+ WidenOps);
Idx = End - 1;
LdTy = NewLdTy;
}
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D84463.280247.patch
Type: text/x-patch
Size: 3231 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20200723/7ba3212f/attachment.bin>
More information about the llvm-commits
mailing list