r334266 - [X86] Add builtins for shufps and shufpd to enable target feature and immediate range checking.
Craig Topper via cfe-commits
cfe-commits at lists.llvm.org
Fri Jun 8 00:18:33 PDT 2018
Author: ctopper
Date: Fri Jun 8 00:18:33 2018
New Revision: 334266
URL: http://llvm.org/viewvc/llvm-project?rev=334266&view=rev
Log:
[X86] Add builtins for shufps and shufpd to enable target feature and immediate range checking.
Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Headers/avxintrin.h
cfe/trunk/lib/Headers/emmintrin.h
cfe/trunk/lib/Headers/xmmintrin.h
cfe/trunk/lib/Sema/SemaChecking.cpp
Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=334266&r1=334265&r2=334266&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Fri Jun 8 00:18:33 2018
@@ -316,6 +316,7 @@ TARGET_BUILTIN(__builtin_ia32_rsqrtps, "
TARGET_BUILTIN(__builtin_ia32_rsqrtss, "V4fV4f", "nc", "sse")
TARGET_BUILTIN(__builtin_ia32_sqrtps, "V4fV4f", "nc", "sse")
TARGET_BUILTIN(__builtin_ia32_sqrtss, "V4fV4f", "nc", "sse")
+TARGET_BUILTIN(__builtin_ia32_shufps, "V4fV4fV4fIi", "nc", "sse")
TARGET_BUILTIN(__builtin_ia32_maskmovdqu, "vV16cV16cc*", "n", "sse2")
TARGET_BUILTIN(__builtin_ia32_movmskpd, "iV2d", "nc", "sse2")
@@ -327,6 +328,7 @@ TARGET_BUILTIN(__builtin_ia32_pshufhw, "
TARGET_BUILTIN(__builtin_ia32_psadbw128, "V2LLiV16cV16c", "nc", "sse2")
TARGET_BUILTIN(__builtin_ia32_sqrtpd, "V2dV2d", "nc", "sse2")
TARGET_BUILTIN(__builtin_ia32_sqrtsd, "V2dV2d", "nc", "sse2")
+TARGET_BUILTIN(__builtin_ia32_shufpd, "V2dV2dV2di", "nc", "sse2")
TARGET_BUILTIN(__builtin_ia32_cvtpd2dq, "V2LLiV2d", "nc", "sse2")
TARGET_BUILTIN(__builtin_ia32_cvtpd2ps, "V4fV2d", "nc", "sse2")
TARGET_BUILTIN(__builtin_ia32_cvttpd2dq, "V4iV2d", "nc", "sse2")
@@ -487,6 +489,8 @@ TARGET_BUILTIN(__builtin_ia32_blendpd256
TARGET_BUILTIN(__builtin_ia32_blendps256, "V8fV8fV8fIi", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_blendvpd256, "V4dV4dV4dV4d", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_blendvps256, "V8fV8fV8fV8f", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_shufpd256, "V4dV4dV4dIi", "nc", "avx")
+TARGET_BUILTIN(__builtin_ia32_shufps256, "V8fV8fV8fIi", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_dpps256, "V8fV8fV8fIc", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_cmppd, "V2dV2dV2dIc", "nc", "avx")
TARGET_BUILTIN(__builtin_ia32_cmppd256, "V4dV4dV4dIc", "nc", "avx")
@@ -1536,6 +1540,8 @@ TARGET_BUILTIN(__builtin_ia32_shuf_f32x4
TARGET_BUILTIN(__builtin_ia32_shuf_f64x2, "V8dV8dV8dIi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_shuf_i32x4, "V16iV16iV16iIi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_shuf_i64x2, "V8LLiV8LLiV8LLiIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_shufpd512, "V8dV8dV8dIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_shufps512, "V16fV16fV16fIi", "nc", "avx512f")
TARGET_BUILTIN(__builtin_ia32_shuf_f32x4_256, "V8fV8fV8fIi", "nc", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_shuf_f64x2_256, "V4dV4dV4dIi", "nc", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_shuf_i32x4_256, "V8iV8iV8iIi", "nc", "avx512vl")
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=334266&r1=334265&r2=334266&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Fri Jun 8 00:18:33 2018
@@ -9403,6 +9403,36 @@ Value *CodeGenFunction::EmitX86BuiltinEx
makeArrayRef(Indices, NumElts),
"permil");
}
+ case X86::BI__builtin_ia32_shufpd:
+ case X86::BI__builtin_ia32_shufpd256:
+ case X86::BI__builtin_ia32_shufpd512:
+ case X86::BI__builtin_ia32_shufps:
+ case X86::BI__builtin_ia32_shufps256:
+ case X86::BI__builtin_ia32_shufps512: {
+ uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+ llvm::Type *Ty = Ops[0]->getType();
+ unsigned NumElts = Ty->getVectorNumElements();
+ unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
+ unsigned NumLaneElts = NumElts / NumLanes;
+
+ // Splat the 8-bits of immediate 4 times to help the loop wrap around.
+ Imm = (Imm & 0xff) * 0x01010101;
+
+ uint32_t Indices[16];
+ for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
+ for (unsigned i = 0; i != NumLaneElts; ++i) {
+ unsigned Index = Imm % NumLaneElts;
+ Imm /= NumLaneElts;
+ if (i >= (NumLaneElts / 2))
+ Index += NumElts;
+ Indices[l + i] = l + Index;
+ }
+ }
+
+ return Builder.CreateShuffleVector(Ops[0], Ops[1],
+ makeArrayRef(Indices, NumElts),
+ "shufp");
+ }
case X86::BI__builtin_ia32_palignr128:
case X86::BI__builtin_ia32_palignr256:
case X86::BI__builtin_ia32_palignr512: {
Modified: cfe/trunk/lib/Headers/avxintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avxintrin.h?rev=334266&r1=334265&r2=334266&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avxintrin.h (original)
+++ cfe/trunk/lib/Headers/avxintrin.h Fri Jun 8 00:18:33 2018
@@ -1516,16 +1516,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b,
/// 11: Bits [127:96] and [255:224] are copied from the selected operand.
/// \returns A 256-bit vector of [8 x float] containing the shuffled values.
#define _mm256_shuffle_ps(a, b, mask) \
- (__m256)__builtin_shufflevector((__v8sf)(__m256)(a), \
- (__v8sf)(__m256)(b), \
- 0 + (((mask) >> 0) & 0x3), \
- 0 + (((mask) >> 2) & 0x3), \
- 8 + (((mask) >> 4) & 0x3), \
- 8 + (((mask) >> 6) & 0x3), \
- 4 + (((mask) >> 0) & 0x3), \
- 4 + (((mask) >> 2) & 0x3), \
- 12 + (((mask) >> 4) & 0x3), \
- 12 + (((mask) >> 6) & 0x3))
+ (__m256)__builtin_ia32_shufps256((__v8sf)(__m256)(a), \
+ (__v8sf)(__m256)(b), (int)(mask))
/// Selects four double-precision values from the 256-bit operands of
/// [4 x double], as specified by the immediate value operand.
@@ -1570,12 +1562,8 @@ _mm256_blendv_ps(__m256 __a, __m256 __b,
/// destination.
/// \returns A 256-bit vector of [4 x double] containing the shuffled values.
#define _mm256_shuffle_pd(a, b, mask) \
- (__m256d)__builtin_shufflevector((__v4df)(__m256d)(a), \
- (__v4df)(__m256d)(b), \
- 0 + (((mask) >> 0) & 0x1), \
- 4 + (((mask) >> 1) & 0x1), \
- 2 + (((mask) >> 2) & 0x1), \
- 6 + (((mask) >> 3) & 0x1))
+ (__m256d)__builtin_ia32_shufpd256((__v4df)(__m256d)(a), \
+ (__v4df)(__m256d)(b), (int)(mask))
/* Compare */
#define _CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */
Modified: cfe/trunk/lib/Headers/emmintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/emmintrin.h?rev=334266&r1=334265&r2=334266&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/emmintrin.h (original)
+++ cfe/trunk/lib/Headers/emmintrin.h Fri Jun 8 00:18:33 2018
@@ -4757,9 +4757,8 @@ _mm_movemask_pd(__m128d __a)
/// Bit[1] = 1: upper element of \a b copied to upper element of result. \n
/// \returns A 128-bit vector of [2 x double] containing the shuffled values.
#define _mm_shuffle_pd(a, b, i) \
- (__m128d)__builtin_shufflevector((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
- 0 + (((i) >> 0) & 0x1), \
- 2 + (((i) >> 1) & 0x1))
+ (__m128d)__builtin_ia32_shufpd((__v2df)(__m128d)(a), (__v2df)(__m128d)(b), \
+ (int)(i))
/// Casts a 128-bit floating-point vector of [2 x double] into a 128-bit
/// floating-point vector of [4 x float].
Modified: cfe/trunk/lib/Headers/xmmintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/xmmintrin.h?rev=334266&r1=334265&r2=334266&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/xmmintrin.h (original)
+++ cfe/trunk/lib/Headers/xmmintrin.h Fri Jun 8 00:18:33 2018
@@ -2605,11 +2605,8 @@ void _mm_setcsr(unsigned int __i);
/// 11: Bits [127:96] copied from the specified operand.
/// \returns A 128-bit vector of [4 x float] containing the shuffled values.
#define _mm_shuffle_ps(a, b, mask) \
- (__m128)__builtin_shufflevector((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
- 0 + (((mask) >> 0) & 0x3), \
- 0 + (((mask) >> 2) & 0x3), \
- 4 + (((mask) >> 4) & 0x3), \
- 4 + (((mask) >> 6) & 0x3))
+ (__m128)__builtin_ia32_shufps((__v4sf)(__m128)(a), (__v4sf)(__m128)(b), \
+ (int)(mask))
/// Unpacks the high-order (index 2,3) values from two 128-bit vectors of
/// [4 x float] and interleaves them into a 128-bit vector of [4 x float].
Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=334266&r1=334265&r2=334266&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
+++ cfe/trunk/lib/Sema/SemaChecking.cpp Fri Jun 8 00:18:33 2018
@@ -2654,6 +2654,7 @@ bool Sema::CheckX86BuiltinFunctionCall(u
break;
case X86::BI__builtin_ia32_sha1rnds4:
case X86::BI__builtin_ia32_blendpd:
+ case X86::BI__builtin_ia32_shufpd:
case X86::BI__builtin_ia32_vec_set_v4hi:
case X86::BI__builtin_ia32_vec_set_v4si:
case X86::BI__builtin_ia32_vec_set_v4di:
@@ -2721,6 +2722,7 @@ bool Sema::CheckX86BuiltinFunctionCall(u
case X86::BI__builtin_ia32_pblendd128:
case X86::BI__builtin_ia32_blendps:
case X86::BI__builtin_ia32_blendpd256:
+ case X86::BI__builtin_ia32_shufpd256:
case X86::BI__builtin_ia32_roundss:
case X86::BI__builtin_ia32_roundsd:
case X86::BI__builtin_ia32_rangepd128_mask:
@@ -2824,6 +2826,10 @@ bool Sema::CheckX86BuiltinFunctionCall(u
case X86::BI__builtin_ia32_shuf_f64x2:
case X86::BI__builtin_ia32_shuf_i32x4:
case X86::BI__builtin_ia32_shuf_i64x2:
+ case X86::BI__builtin_ia32_shufpd512:
+ case X86::BI__builtin_ia32_shufps:
+ case X86::BI__builtin_ia32_shufps256:
+ case X86::BI__builtin_ia32_shufps512:
case X86::BI__builtin_ia32_dbpsadbw128_mask:
case X86::BI__builtin_ia32_dbpsadbw256_mask:
case X86::BI__builtin_ia32_dbpsadbw512_mask:
More information about the cfe-commits
mailing list