[PATCH] D102751: [X86] Limit X86InterleavedAccessGroup to handle the same type case only
Pengfei Wang via Phabricator via llvm-commits
llvm-commits at lists.llvm.org
Wed May 19 07:50:53 PDT 2021
This revision was automatically updated to reflect the committed changes.
Closed by commit rG9d09d20448e4: Reapply "[X86] Limit X86InterleavedAccessGroup to handle the same type caseā¦ (authored by pengfei).
Changed prior to commit:
https://reviews.llvm.org/D102751?vs=346390&id=346460#toc
Repository:
rG LLVM Github Monorepo
CHANGES SINCE LAST ACTION
https://reviews.llvm.org/D102751/new/
https://reviews.llvm.org/D102751
Files:
llvm/lib/Target/X86/X86InterleavedAccess.cpp
llvm/test/CodeGen/X86/x86-interleaved-access.ll
Index: llvm/test/CodeGen/X86/x86-interleaved-access.ll
===================================================================
--- llvm/test/CodeGen/X86/x86-interleaved-access.ll
+++ llvm/test/CodeGen/X86/x86-interleaved-access.ll
@@ -1930,3 +1930,22 @@
store <16 x i64> %r, <16 x i64>* %d, align 8
ret void
}
+
+define <2 x i64> @PR37616(<16 x i64>* %a0) {
+; AVX1-LABEL: PR37616:
+; AVX1: # %bb.0:
+; AVX1-NEXT: vmovaps 16(%rdi), %xmm0
+; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
+; AVX1-NEXT: retq
+;
+; AVX2OR512-LABEL: PR37616:
+; AVX2OR512: # %bb.0:
+; AVX2OR512-NEXT: vmovaps (%rdi), %ymm0
+; AVX2OR512-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
+; AVX2OR512-NEXT: vextractf128 $1, %ymm0, %xmm0
+; AVX2OR512-NEXT: vzeroupper
+; AVX2OR512-NEXT: retq
+ %load = load <16 x i64>, <16 x i64>* %a0, align 128
+ %shuffle = shufflevector <16 x i64> %load, <16 x i64> undef, <2 x i32> <i32 2, i32 6>
+ ret <2 x i64> %shuffle
+}
Index: llvm/lib/Target/X86/X86InterleavedAccess.cpp
===================================================================
--- llvm/lib/Target/X86/X86InterleavedAccess.cpp
+++ llvm/lib/Target/X86/X86InterleavedAccess.cpp
@@ -724,30 +724,34 @@
auto *ShuffleTy = cast<FixedVectorType>(Shuffles[0]->getType());
if (isa<LoadInst>(Inst)) {
- // Try to generate target-sized register(/instruction).
- decompose(Inst, Factor, ShuffleTy, DecomposedVectors);
-
auto *ShuffleEltTy = cast<FixedVectorType>(Inst->getType());
unsigned NumSubVecElems = ShuffleEltTy->getNumElements() / Factor;
- // Perform matrix-transposition in order to compute interleaved
- // results by generating some sort of (optimized) target-specific
- // instructions.
-
switch (NumSubVecElems) {
default:
return false;
case 4:
- transpose_4x4(DecomposedVectors, TransposedVectors);
- break;
case 8:
case 16:
case 32:
case 64:
- deinterleave8bitStride3(DecomposedVectors, TransposedVectors,
- NumSubVecElems);
+ if (ShuffleTy->getNumElements() != NumSubVecElems)
+ return false;
break;
}
+ // Try to generate target-sized register(/instruction).
+ decompose(Inst, Factor, ShuffleTy, DecomposedVectors);
+
+ // Perform matrix-transposition in order to compute interleaved
+ // results by generating some sort of (optimized) target-specific
+ // instructions.
+
+ if (NumSubVecElems == 4)
+ transpose_4x4(DecomposedVectors, TransposedVectors);
+ else
+ deinterleave8bitStride3(DecomposedVectors, TransposedVectors,
+ NumSubVecElems);
+
// Now replace the unoptimized-interleaved-vectors with the
// transposed-interleaved vectors.
for (unsigned i = 0, e = Shuffles.size(); i < e; ++i)
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D102751.346460.patch
Type: text/x-patch
Size: 2862 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20210519/6ec7c2ef/attachment.bin>
More information about the llvm-commits
mailing list