r334249 - [X86] Add builtins for blend with immediate control to enforce target feature requirements and check immediate range.
Craig Topper via cfe-commits
cfe-commits at lists.llvm.org
Thu Jun 7 17:00:21 PDT 2018
Author: ctopper
Date: Thu Jun 7 17:00:21 2018
New Revision: 334249
URL: http://llvm.org/viewvc/llvm-project?rev=334249&view=rev
Log:
[X86] Add builtins for blend with immediate control to enforce target feature requirements and check immediate range.
Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Headers/avx2intrin.h
cfe/trunk/lib/Headers/avxintrin.h
cfe/trunk/lib/Headers/smmintrin.h
cfe/trunk/lib/Sema/SemaChecking.cpp
cfe/trunk/test/CodeGen/avx-builtins.c
cfe/trunk/test/CodeGen/avx2-builtins.c
Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=334249&r1=334248&r2=334249&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Thu Jun 7 17:00:21 2018
@@ -369,6 +369,9 @@ TARGET_BUILTIN(__builtin_ia32_palignr128
TARGET_BUILTIN(__builtin_ia32_insertps128, "V4fV4fV4fIc", "nc", "sse4.1")
TARGET_BUILTIN(__builtin_ia32_pblendvb128, "V16cV16cV16cV16c", "nc", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_pblendw128, "V8sV8sV8sIi", "nc", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_blendpd, "V2dV2dV2dIi", "nc", "sse4.1")
+TARGET_BUILTIN(__builtin_ia32_blendps, "V4fV4fV4fIi", "nc", "sse4.1")
TARGET_BUILTIN(__builtin_ia32_blendvpd, "V2dV2dV2dV2d", "nc", "sse4.1")
TARGET_BUILTIN(__builtin_ia32_blendvps, "V4fV4fV4fV4f", "nc", "sse4.1")
TARGET_BUILTIN(__builtin_ia32_packusdw128, "V8sV4iV4i", "nc", "sse4.1")
@@ -477,6 +480,8 @@ TARGET_BUILTIN(__builtin_ia32_vpermilvar
TARGET_BUILTIN(__builtin_ia32_vpermilvarps, "V4fV4fV4i", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_vpermilvarpd256, "V4dV4dV4LLi", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_vpermilvarps256, "V8fV8fV8i", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_blendpd256, "V4dV4dV4dIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_blendps256, "V8fV8fV8fIi", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "nc", "avx")
@@ -554,6 +559,7 @@ TARGET_BUILTIN(__builtin_ia32_psubusb256
TARGET_BUILTIN(__builtin_ia32_psubusw256, "V16sV16sV16s", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_palignr256, "V32cV32cV32cIi", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_pblendvb256, "V32cV32cV32cV32c", "nc", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pblendw256, "V16sV16sV16sIi", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_phaddw256, "V16sV16sV16s", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_phaddd256, "V8iV8iV8i", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_phaddsw256, "V16sV16sV16s", "nc", "avx2")
@@ -603,6 +609,8 @@ TARGET_BUILTIN(__builtin_ia32_psrldi256,
TARGET_BUILTIN(__builtin_ia32_psrld256, "V8iV8iV4i", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_psrlqi256, "V4LLiV4LLii", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_psrlq256, "V4LLiV4LLiV2LLi", "nc", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pblendd128, "V4iV4iV4iIi", "nc", "avx2")
+TARGET_BUILTIN(__builtin_ia32_pblendd256, "V8iV8iV8iIi", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_permvarsi256, "V8iV8iV8i", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_permvarsf256, "V8fV8fV8i", "nc", "avx2")
TARGET_BUILTIN(__builtin_ia32_permti256, "V4LLiV4LLiV4LLiIc", "nc", "avx2")
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=334249&r1=334248&r2=334249&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Jun 7 17:00:21 2018
@@ -9235,6 +9235,27 @@ Value *CodeGenFunction::EmitX86BuiltinEx
Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
}
+ case X86::BI__builtin_ia32_pblendw128:
+ case X86::BI__builtin_ia32_blendpd:
+ case X86::BI__builtin_ia32_blendps:
+ case X86::BI__builtin_ia32_blendpd256:
+ case X86::BI__builtin_ia32_blendps256:
+ case X86::BI__builtin_ia32_pblendw256:
+ case X86::BI__builtin_ia32_pblendd128:
+ case X86::BI__builtin_ia32_pblendd256: {
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+
+ uint32_t Indices[16];
+ // If there are more than 8 elements, the immediate is used twice so make
+ // sure we handle that.
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
+
+ return Builder.CreateShuffleVector(Ops[1], Ops[0],
+ makeArrayRef(Indices, NumElts),
+ "blend");
+ }
case X86::BI__builtin_ia32_palignr128:
case X86::BI__builtin_ia32_palignr256:
case X86::BI__builtin_ia32_palignr512: {
Modified: cfe/trunk/lib/Headers/avx2intrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx2intrin.h?rev=334249&r1=334248&r2=334249&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx2intrin.h (original)
+++ cfe/trunk/lib/Headers/avx2intrin.h Thu Jun 7 17:00:21 2018
@@ -170,24 +170,8 @@ _mm256_blendv_epi8(__m256i __V1, __m256i
}
#define _mm256_blend_epi16(V1, V2, M) \
- (__m256i)__builtin_shufflevector((__v16hi)(__m256i)(V1), \
- (__v16hi)(__m256i)(V2), \
- (((M) & 0x01) ? 16 : 0), \
- (((M) & 0x02) ? 17 : 1), \
- (((M) & 0x04) ? 18 : 2), \
- (((M) & 0x08) ? 19 : 3), \
- (((M) & 0x10) ? 20 : 4), \
- (((M) & 0x20) ? 21 : 5), \
- (((M) & 0x40) ? 22 : 6), \
- (((M) & 0x80) ? 23 : 7), \
- (((M) & 0x01) ? 24 : 8), \
- (((M) & 0x02) ? 25 : 9), \
- (((M) & 0x04) ? 26 : 10), \
- (((M) & 0x08) ? 27 : 11), \
- (((M) & 0x10) ? 28 : 12), \
- (((M) & 0x20) ? 29 : 13), \
- (((M) & 0x40) ? 30 : 14), \
- (((M) & 0x80) ? 31 : 15))
+ (__m256i)__builtin_ia32_pblendw256((__v16hi)(__m256i)(V1), \
+ (__v16hi)(__m256i)(V2), (int)(M))
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_cmpeq_epi8(__m256i __a, __m256i __b)
@@ -809,24 +793,12 @@ _mm256_broadcastsi128_si256(__m128i __X)
}
#define _mm_blend_epi32(V1, V2, M) \
- (__m128i)__builtin_shufflevector((__v4si)(__m128i)(V1), \
- (__v4si)(__m128i)(V2), \
- (((M) & 0x01) ? 4 : 0), \
- (((M) & 0x02) ? 5 : 1), \
- (((M) & 0x04) ? 6 : 2), \
- (((M) & 0x08) ? 7 : 3))
+ (__m128i)__builtin_ia32_pblendd128((__v4si)(__m128i)(V1), \
+ (__v4si)(__m128i)(V2), (int)(M))
#define _mm256_blend_epi32(V1, V2, M) \
- (__m256i)__builtin_shufflevector((__v8si)(__m256i)(V1), \
- (__v8si)(__m256i)(V2), \
- (((M) & 0x01) ? 8 : 0), \
- (((M) & 0x02) ? 9 : 1), \
- (((M) & 0x04) ? 10 : 2), \
- (((M) & 0x08) ? 11 : 3), \
- (((M) & 0x10) ? 12 : 4), \
- (((M) & 0x20) ? 13 : 5), \
- (((M) & 0x40) ? 14 : 6), \
- (((M) & 0x80) ? 15 : 7))
+ (__m256i)__builtin_ia32_pblendd256((__v8si)(__m256i)(V1), \
+ (__v8si)(__m256i)(V2), (int)(M))
static __inline__ __m256i __DEFAULT_FN_ATTRS
_mm256_broadcastb_epi8(__m128i __X)
Modified: cfe/trunk/lib/Headers/avxintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avxintrin.h?rev=334249&r1=334248&r2=334249&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avxintrin.h (original)
+++ cfe/trunk/lib/Headers/avxintrin.h Thu Jun 7 17:00:21 2018
@@ -1355,12 +1355,8 @@ _mm256_permutevar_ps(__m256 __a, __m256i
/// operand \a V2 is copied to the same position in the destination.
/// \returns A 256-bit vector of [4 x double] containing the copied values.
#define _mm256_blend_pd(V1, V2, M) \
- (__m256d)__builtin_shufflevector((__v4df)(__m256d)(V1), \
- (__v4df)(__m256d)(V2), \
- (((M) & 0x01) ? 4 : 0), \
- (((M) & 0x02) ? 5 : 1), \
- (((M) & 0x04) ? 6 : 2), \
- (((M) & 0x08) ? 7 : 3))
+ (__m256d)__builtin_ia32_blendpd256((__v4df)(__m256d)(V1), \
+ (__v4df)(__m256d)(V2), (int)(M))
/// Merges 32-bit single-precision data values stored in either of the
/// two 256-bit vectors of [8 x float], as specified by the immediate
@@ -1387,16 +1383,8 @@ _mm256_permutevar_ps(__m256 __a, __m256i
/// operand \a V2 is copied to the same position in the destination.
/// \returns A 256-bit vector of [8 x float] containing the copied values.
#define _mm256_blend_ps(V1, V2, M) \
- (__m256)__builtin_shufflevector((__v8sf)(__m256)(V1), \
- (__v8sf)(__m256)(V2), \
- (((M) & 0x01) ? 8 : 0), \
- (((M) & 0x02) ? 9 : 1), \
- (((M) & 0x04) ? 10 : 2), \
- (((M) & 0x08) ? 11 : 3), \
- (((M) & 0x10) ? 12 : 4), \
- (((M) & 0x20) ? 13 : 5), \
- (((M) & 0x40) ? 14 : 6), \
- (((M) & 0x80) ? 15 : 7))
+ (__m256)__builtin_ia32_blendps256((__v8sf)(__m256)(V1), \
+ (__v8sf)(__m256)(V2), (int)(M))
/// Merges 64-bit double-precision data values stored in either of the
/// two 256-bit vectors of [4 x double], as specified by the 256-bit vector
Modified: cfe/trunk/lib/Headers/smmintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/smmintrin.h?rev=334249&r1=334248&r2=334249&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/smmintrin.h (original)
+++ cfe/trunk/lib/Headers/smmintrin.h Thu Jun 7 17:00:21 2018
@@ -390,10 +390,8 @@
/// is copied to the same position in the result.
/// \returns A 128-bit vector of [2 x double] containing the copied values.
#define _mm_blend_pd(V1, V2, M) \
- (__m128d)__builtin_shufflevector((__v2df)(__m128d)(V1), \
- (__v2df)(__m128d)(V2), \
- (((M) & 0x01) ? 2 : 0), \
- (((M) & 0x02) ? 3 : 1))
+ (__m128d) __builtin_ia32_blendpd ((__v2df)(__m128d)(V1), \
+ (__v2df)(__m128d)(V2), (int)(M))
/// Returns a 128-bit vector of [4 x float] where the values are selected
/// from either the first or second operand as specified by the third
@@ -420,11 +418,8 @@
/// is copied to the same position in the result.
/// \returns A 128-bit vector of [4 x float] containing the copied values.
#define _mm_blend_ps(V1, V2, M) \
- (__m128)__builtin_shufflevector((__v4sf)(__m128)(V1), (__v4sf)(__m128)(V2), \
- (((M) & 0x01) ? 4 : 0), \
- (((M) & 0x02) ? 5 : 1), \
- (((M) & 0x04) ? 6 : 2), \
- (((M) & 0x08) ? 7 : 3))
+ (__m128) __builtin_ia32_blendps ((__v4sf)(__m128)(V1), \
+ (__v4sf)(__m128)(V2), (int)(M))
/// Returns a 128-bit vector of [2 x double] where the values are
/// selected from either the first or second operand as specified by the
@@ -532,16 +527,8 @@ _mm_blendv_epi8 (__m128i __V1, __m128i _
/// is copied to the same position in the result.
/// \returns A 128-bit vector of [8 x i16] containing the copied values.
#define _mm_blend_epi16(V1, V2, M) \
- (__m128i)__builtin_shufflevector((__v8hi)(__m128i)(V1), \
- (__v8hi)(__m128i)(V2), \
- (((M) & 0x01) ? 8 : 0), \
- (((M) & 0x02) ? 9 : 1), \
- (((M) & 0x04) ? 10 : 2), \
- (((M) & 0x08) ? 11 : 3), \
- (((M) & 0x10) ? 12 : 4), \
- (((M) & 0x20) ? 13 : 5), \
- (((M) & 0x40) ? 14 : 6), \
- (((M) & 0x80) ? 15 : 7))
+ (__m128i) __builtin_ia32_pblendw128 ((__v8hi)(__m128i)(V1), \
+ (__v8hi)(__m128i)(V2), (int)(M))
/* SSE4 Dword Multiply Instructions. */
/// Multiples corresponding elements of two 128-bit vectors of [4 x i32]
Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=334249&r1=334248&r2=334249&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
+++ cfe/trunk/lib/Sema/SemaChecking.cpp Thu Jun 7 17:00:21 2018
@@ -2624,6 +2624,7 @@ bool Sema::CheckX86BuiltinFunctionCall(u
i = 1; l = 0; u = 7;
break;
case X86::BI__builtin_ia32_sha1rnds4:
+ case X86::BI__builtin_ia32_blendpd:
case X86::BI__builtin_ia32_vec_set_v4hi:
case X86::BI__builtin_ia32_vec_set_v4si:
case X86::BI__builtin_ia32_vec_set_v4di:
@@ -2683,6 +2684,9 @@ bool Sema::CheckX86BuiltinFunctionCall(u
case X86::BI__builtin_ia32_vec_ext_v16hi:
i = 1; l = 0; u = 15;
break;
+ case X86::BI__builtin_ia32_pblendd128:
+ case X86::BI__builtin_ia32_blendps:
+ case X86::BI__builtin_ia32_blendpd256:
case X86::BI__builtin_ia32_roundss:
case X86::BI__builtin_ia32_roundsd:
case X86::BI__builtin_ia32_rangepd128_mask:
@@ -2754,6 +2758,10 @@ bool Sema::CheckX86BuiltinFunctionCall(u
case X86::BI__builtin_ia32_fpclassss_mask:
i = 1; l = 0; u = 255;
break;
+ case X86::BI__builtin_ia32_pblendw128:
+ case X86::BI__builtin_ia32_pblendw256:
+ case X86::BI__builtin_ia32_blendps256:
+ case X86::BI__builtin_ia32_pblendd256:
case X86::BI__builtin_ia32_palignr128:
case X86::BI__builtin_ia32_palignr256:
case X86::BI__builtin_ia32_palignr512:
Modified: cfe/trunk/test/CodeGen/avx-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx-builtins.c?rev=334249&r1=334248&r2=334249&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx-builtins.c Thu Jun 7 17:00:21 2018
@@ -59,7 +59,7 @@ __m256 test_mm256_andnot_ps(__m256 A, __
__m256d test_mm256_blend_pd(__m256d A, __m256d B) {
// CHECK-LABEL: test_mm256_blend_pd
// CHECK: shufflevector <4 x double> %{{.*}}, <4 x double> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
- return _mm256_blend_pd(A, B, 0x35);
+ return _mm256_blend_pd(A, B, 0x05);
}
__m256 test_mm256_blend_ps(__m256 A, __m256 B) {
Modified: cfe/trunk/test/CodeGen/avx2-builtins.c
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/avx2-builtins.c?rev=334249&r1=334248&r2=334249&view=diff
==============================================================================
--- cfe/trunk/test/CodeGen/avx2-builtins.c (original)
+++ cfe/trunk/test/CodeGen/avx2-builtins.c Thu Jun 7 17:00:21 2018
@@ -141,7 +141,7 @@ __m128i test_mm_blend_epi32(__m128i a, _
// CHECK-LABEL: test_mm_blend_epi32
// CHECK-NOT: @llvm.x86.avx2.pblendd.128
// CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
- return _mm_blend_epi32(a, b, 0x35);
+ return _mm_blend_epi32(a, b, 0x05);
}
__m256i test_mm256_blend_epi32(__m256i a, __m256i b) {
More information about the cfe-commits
mailing list