r334237 - [X86] Add builtins for VALIGNQ/VALIGND to enable proper target feature checking.
Craig Topper via cfe-commits
cfe-commits at lists.llvm.org
Thu Jun 7 14:27:41 PDT 2018
Author: ctopper
Date: Thu Jun 7 14:27:41 2018
New Revision: 334237
URL: http://llvm.org/viewvc/llvm-project?rev=334237&view=rev
Log:
[X86] Add builtins for VALIGNQ/VALIGND to enable proper target feature checking.
We still emit shufflevector instructions we just do it from CGBuiltin.cpp now. This ensures the intrinsics that use this are only available on CPUs that support the feature.
I also added range checking to the immediate, but only checked it is 8 bits or smaller. We should maybe be stricter since we never use all 8 bits, but gcc doesn't seem to do that.
Modified:
cfe/trunk/include/clang/Basic/BuiltinsX86.def
cfe/trunk/lib/CodeGen/CGBuiltin.cpp
cfe/trunk/lib/Headers/avx512fintrin.h
cfe/trunk/lib/Headers/avx512vlintrin.h
cfe/trunk/lib/Sema/SemaChecking.cpp
Modified: cfe/trunk/include/clang/Basic/BuiltinsX86.def
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/Basic/BuiltinsX86.def?rev=334237&r1=334236&r2=334237&view=diff
==============================================================================
--- cfe/trunk/include/clang/Basic/BuiltinsX86.def (original)
+++ cfe/trunk/include/clang/Basic/BuiltinsX86.def Thu Jun 7 14:27:41 2018
@@ -909,6 +909,12 @@ TARGET_BUILTIN(__builtin_ia32_storeupd51
TARGET_BUILTIN(__builtin_ia32_storeapd512_mask, "vV8d*V8dUc", "n", "avx512f")
TARGET_BUILTIN(__builtin_ia32_storeups512_mask, "vf*V16fUs", "n", "avx512f")
TARGET_BUILTIN(__builtin_ia32_storeaps512_mask, "vV16f*V16fUs", "n", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_alignq512, "V8LLiV8LLiV8LLiIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_alignd512, "V16iV16iV16iIi", "nc", "avx512f")
+TARGET_BUILTIN(__builtin_ia32_alignd128, "V4iV4iV4iIi", "nc", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_alignd256, "V8iV8iV8iIi", "nc", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_alignq128, "V2LLiV2LLiV2LLiIi", "nc", "avx512vl")
+TARGET_BUILTIN(__builtin_ia32_alignq256, "V4LLiV4LLiV4LLiIi", "nc", "avx512vl")
TARGET_BUILTIN(__builtin_ia32_vpdpbusd128, "V4iV4iV4iV4i", "nc", "avx512vl,avx512vnni")
TARGET_BUILTIN(__builtin_ia32_vpdpbusd256, "V8iV8iV8iV8i", "nc", "avx512vl,avx512vnni")
Modified: cfe/trunk/lib/CodeGen/CGBuiltin.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CGBuiltin.cpp?rev=334237&r1=334236&r2=334237&view=diff
==============================================================================
--- cfe/trunk/lib/CodeGen/CGBuiltin.cpp (original)
+++ cfe/trunk/lib/CodeGen/CGBuiltin.cpp Thu Jun 7 14:27:41 2018
@@ -9222,6 +9222,26 @@ Value *CodeGenFunction::EmitX86BuiltinEx
makeArrayRef(Indices, NumElts),
"palignr");
}
+ case X86::BI__builtin_ia32_alignd128:
+ case X86::BI__builtin_ia32_alignd256:
+ case X86::BI__builtin_ia32_alignd512:
+ case X86::BI__builtin_ia32_alignq128:
+ case X86::BI__builtin_ia32_alignq256:
+ case X86::BI__builtin_ia32_alignq512: {
+ unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
+ unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
+
+ // Mask the shift amount to width of two vectors.
+ ShiftVal &= (2 * NumElts) - 1;
+
+ uint32_t Indices[16];
+ for (unsigned i = 0; i != NumElts; ++i)
+ Indices[i] = i + ShiftVal;
+
+ return Builder.CreateShuffleVector(Ops[1], Ops[0],
+ makeArrayRef(Indices, NumElts),
+ "valign");
+ }
case X86::BI__builtin_ia32_vperm2f128_pd256:
case X86::BI__builtin_ia32_vperm2f128_ps256:
Modified: cfe/trunk/lib/Headers/avx512fintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512fintrin.h?rev=334237&r1=334236&r2=334237&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512fintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512fintrin.h Thu Jun 7 14:27:41 2018
@@ -3465,16 +3465,8 @@ _mm512_maskz_permutex2var_epi64(__mmask8
}
#define _mm512_alignr_epi64(A, B, I) \
- (__m512i)__builtin_shufflevector((__v8di)(__m512i)(B), \
- (__v8di)(__m512i)(A), \
- ((int)(I) & 0x7) + 0, \
- ((int)(I) & 0x7) + 1, \
- ((int)(I) & 0x7) + 2, \
- ((int)(I) & 0x7) + 3, \
- ((int)(I) & 0x7) + 4, \
- ((int)(I) & 0x7) + 5, \
- ((int)(I) & 0x7) + 6, \
- ((int)(I) & 0x7) + 7)
+ (__m512i)__builtin_ia32_alignq512((__v8di)(__m512i)(A), \
+ (__v8di)(__m512i)(B), (int)(I))
#define _mm512_mask_alignr_epi64(W, U, A, B, imm) \
(__m512i)__builtin_ia32_selectq_512((__mmask8)(U), \
@@ -3487,24 +3479,8 @@ _mm512_maskz_permutex2var_epi64(__mmask8
(__v8di)_mm512_setzero_si512())
#define _mm512_alignr_epi32(A, B, I) \
- (__m512i)__builtin_shufflevector((__v16si)(__m512i)(B), \
- (__v16si)(__m512i)(A), \
- ((int)(I) & 0xf) + 0, \
- ((int)(I) & 0xf) + 1, \
- ((int)(I) & 0xf) + 2, \
- ((int)(I) & 0xf) + 3, \
- ((int)(I) & 0xf) + 4, \
- ((int)(I) & 0xf) + 5, \
- ((int)(I) & 0xf) + 6, \
- ((int)(I) & 0xf) + 7, \
- ((int)(I) & 0xf) + 8, \
- ((int)(I) & 0xf) + 9, \
- ((int)(I) & 0xf) + 10, \
- ((int)(I) & 0xf) + 11, \
- ((int)(I) & 0xf) + 12, \
- ((int)(I) & 0xf) + 13, \
- ((int)(I) & 0xf) + 14, \
- ((int)(I) & 0xf) + 15)
+ (__m512i)__builtin_ia32_alignd512((__v16si)(__m512i)(A), \
+ (__v16si)(__m512i)(B), (int)(I))
#define _mm512_mask_alignr_epi32(W, U, A, B, imm) \
(__m512i)__builtin_ia32_selectd_512((__mmask16)(U), \
Modified: cfe/trunk/lib/Headers/avx512vlintrin.h
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Headers/avx512vlintrin.h?rev=334237&r1=334236&r2=334237&view=diff
==============================================================================
--- cfe/trunk/lib/Headers/avx512vlintrin.h (original)
+++ cfe/trunk/lib/Headers/avx512vlintrin.h Thu Jun 7 14:27:41 2018
@@ -8082,12 +8082,8 @@ _mm256_maskz_permutexvar_epi32(__mmask8
}
#define _mm_alignr_epi32(A, B, imm) \
- (__m128i)__builtin_shufflevector((__v4si)(__m128i)(B), \
- (__v4si)(__m128i)(A), \
- ((int)(imm) & 0x3) + 0, \
- ((int)(imm) & 0x3) + 1, \
- ((int)(imm) & 0x3) + 2, \
- ((int)(imm) & 0x3) + 3)
+ (__m128i)__builtin_ia32_alignd128((__v4si)(__m128i)(A), \
+ (__v4si)(__m128i)(B), (int)(imm))
#define _mm_mask_alignr_epi32(W, U, A, B, imm) \
(__m128i)__builtin_ia32_selectd_128((__mmask8)(U), \
@@ -8100,16 +8096,8 @@ _mm256_maskz_permutexvar_epi32(__mmask8
(__v4si)_mm_setzero_si128())
#define _mm256_alignr_epi32(A, B, imm) \
- (__m256i)__builtin_shufflevector((__v8si)(__m256i)(B), \
- (__v8si)(__m256i)(A), \
- ((int)(imm) & 0x7) + 0, \
- ((int)(imm) & 0x7) + 1, \
- ((int)(imm) & 0x7) + 2, \
- ((int)(imm) & 0x7) + 3, \
- ((int)(imm) & 0x7) + 4, \
- ((int)(imm) & 0x7) + 5, \
- ((int)(imm) & 0x7) + 6, \
- ((int)(imm) & 0x7) + 7)
+ (__m256i)__builtin_ia32_alignd256((__v8si)(__m256i)(A), \
+ (__v8si)(__m256i)(B), (int)(imm))
#define _mm256_mask_alignr_epi32(W, U, A, B, imm) \
(__m256i)__builtin_ia32_selectd_256((__mmask8)(U), \
@@ -8122,10 +8110,8 @@ _mm256_maskz_permutexvar_epi32(__mmask8
(__v8si)_mm256_setzero_si256())
#define _mm_alignr_epi64(A, B, imm) \
- (__m128i)__builtin_shufflevector((__v2di)(__m128i)(B), \
- (__v2di)(__m128i)(A), \
- ((int)(imm) & 0x1) + 0, \
- ((int)(imm) & 0x1) + 1)
+ (__m128i)__builtin_ia32_alignq128((__v2di)(__m128i)(A), \
+ (__v2di)(__m128i)(B), (int)(imm))
#define _mm_mask_alignr_epi64(W, U, A, B, imm) \
(__m128i)__builtin_ia32_selectq_128((__mmask8)(U), \
@@ -8138,12 +8124,8 @@ _mm256_maskz_permutexvar_epi32(__mmask8
(__v2di)_mm_setzero_si128())
#define _mm256_alignr_epi64(A, B, imm) \
- (__m256i)__builtin_shufflevector((__v4di)(__m256i)(B), \
- (__v4di)(__m256i)(A), \
- ((int)(imm) & 0x3) + 0, \
- ((int)(imm) & 0x3) + 1, \
- ((int)(imm) & 0x3) + 2, \
- ((int)(imm) & 0x3) + 3)
+ (__m256i)__builtin_ia32_alignq256((__v4di)(__m256i)(A), \
+ (__v4di)(__m256i)(B), (int)(imm))
#define _mm256_mask_alignr_epi64(W, U, A, B, imm) \
(__m256i)__builtin_ia32_selectq_256((__mmask8)(U), \
Modified: cfe/trunk/lib/Sema/SemaChecking.cpp
URL: http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Sema/SemaChecking.cpp?rev=334237&r1=334236&r2=334237&view=diff
==============================================================================
--- cfe/trunk/lib/Sema/SemaChecking.cpp (original)
+++ cfe/trunk/lib/Sema/SemaChecking.cpp Thu Jun 7 14:27:41 2018
@@ -2712,6 +2712,12 @@ bool Sema::CheckX86BuiltinFunctionCall(u
case X86::BI__builtin_ia32_palignr128:
case X86::BI__builtin_ia32_palignr256:
case X86::BI__builtin_ia32_palignr512:
+ case X86::BI__builtin_ia32_alignq512:
+ case X86::BI__builtin_ia32_alignd512:
+ case X86::BI__builtin_ia32_alignd128:
+ case X86::BI__builtin_ia32_alignd256:
+ case X86::BI__builtin_ia32_alignq128:
+ case X86::BI__builtin_ia32_alignq256:
case X86::BI__builtin_ia32_vcomisd:
case X86::BI__builtin_ia32_vcomiss:
case X86::BI__builtin_ia32_dbpsadbw128_mask:
More information about the cfe-commits
mailing list