[PATCH] D37892: [X86] Use native shuffle vector for the perm2f128 intrinsics
Craig Topper via Phabricator via cfe-commits
cfe-commits at lists.llvm.org
Fri Sep 15 11:10:31 PDT 2017
craig.topper updated this revision to Diff 115427.
craig.topper added a comment.
Convert the AVX2 integer intrinsic as well.
https://reviews.llvm.org/D37892
Files:
lib/CodeGen/CGBuiltin.cpp
test/CodeGen/avx-builtins.c
test/CodeGen/avx2-builtins.c
Index: test/CodeGen/avx2-builtins.c
===================================================================
--- test/CodeGen/avx2-builtins.c
+++ test/CodeGen/avx2-builtins.c
@@ -907,8 +907,8 @@
__m256i test_mm256_permute2x128_si256(__m256i a, __m256i b) {
// CHECK-LABEL: test_mm256_permute2x128_si256
- // CHECK: call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, i8 49)
- return _mm256_permute2x128_si256(a, b, 0x31);
+ // CHECK: shufflevector <4 x i64> zeroinitializer, <4 x i64> %{{.*}}, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
+ return _mm256_permute2x128_si256(a, b, 0x38);
}
__m256i test_mm256_permute4x64_epi64(__m256i a) {
Index: test/CodeGen/avx-builtins.c
===================================================================
--- test/CodeGen/avx-builtins.c
+++ test/CodeGen/avx-builtins.c
@@ -678,19 +678,19 @@
__m256d test_mm256_permute2f128_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_permute2f128_pd
- // CHECK: call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %{{.*}}, <4 x double> %{{.*}}, i8 49)
+ // CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
return _mm256_permute2f128_pd(A, B, 0x31);
}
__m256 test_mm256_permute2f128_ps(__m256 A, __m256 B) {
// CHECK-LABEL: test_mm256_permute2f128_ps
- // CHECK: call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %{{.*}}, <8 x float> %{{.*}}, i8 19)
+ // CHECK: shufflevector <8 x float> %{{.*}}, <8 x float> %{{.*}}, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
return _mm256_permute2f128_ps(A, B, 0x13);
}
__m256i test_mm256_permute2f128_si256(__m256i A, __m256i B) {
// CHECK-LABEL: test_mm256_permute2f128_si256
- // CHECK: call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %{{.*}}, <8 x i32> %{{.*}}, i8 32)
+ // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
return _mm256_permute2f128_si256(A, B, 0x20);
}
Index: lib/CodeGen/CGBuiltin.cpp
===================================================================
--- lib/CodeGen/CGBuiltin.cpp
+++ lib/CodeGen/CGBuiltin.cpp
@@ -7923,6 +7923,45 @@
return EmitX86Select(*this, Ops[4], Align, Ops[3]);
}
+ case X86::BI__builtin_ia32_vperm2f128_pd256:
+ case X86::BI__builtin_ia32_vperm2f128_ps256:
+ case X86::BI__builtin_ia32_vperm2f128_si256:
+ case X86::BI__builtin_ia32_permti256: {
+ unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+
+ // This takes a very simple approach since there are two lanes and a
+ // shuffle can have 2 inputs. So we reserve the first input for the first
+ // lane and the second input for the second lane. This may result in
+ // duplicate sources, but this can be dealt with in the backend.
+
+ Value *OutOps[2];
+ uint32_t Indices[8];
+ for (unsigned l = 0; l != 2; ++l) {
+ // Determine the source for this lane.
+ if (Imm & (1 << ((l * 4) + 3)))
+ OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
+ else if (Imm & (1 << ((l * 4) + 1)))
+ OutOps[l] = Ops[1];
+ else
+ OutOps[l] = Ops[0];
+
+ for (unsigned i = 0; i != NumElts/2; ++i) {
+ // Start with ith element of the source for this lane.
+ unsigned Idx = (l * NumElts) + i;
+ // If bit 0 of the immediate half is set, switch to the high half of
+ // the source.
+ if (Imm & (1 << (l * 4)))
+ Idx += NumElts/2;
+ Indices[(l * (NumElts/2)) + i] = Idx;
+ }
+ }
+
+ return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
+ makeArrayRef(Indices, NumElts),
+ "vperm");
+ }
+
case X86::BI__builtin_ia32_movnti:
case X86::BI__builtin_ia32_movnti64:
case X86::BI__builtin_ia32_movntsd:
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D37892.115427.patch
Type: text/x-patch
Size: 3985 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/cfe-commits/attachments/20170915/876da93a/attachment.bin>
More information about the cfe-commits
mailing list