[clang] fc3b787 - [X86] Add SHA512 instructions.
Freddy Ye via cfe-commits
cfe-commits at lists.llvm.org
Wed Jul 19 18:44:57 PDT 2023
Author: Freddy Ye
Date: 2023-07-20T09:44:44+08:00
New Revision: fc3b7874b6c95f04a249e2c9da3c5221f50c85b2
URL: https://github.com/llvm/llvm-project/commit/fc3b7874b6c95f04a249e2c9da3c5221f50c85b2
DIFF: https://github.com/llvm/llvm-project/commit/fc3b7874b6c95f04a249e2c9da3c5221f50c85b2.diff
LOG: [X86] Add SHA512 instructions.
For more details about this instruction, please refer to the latest ISE document: https://www.intel.com/content/www/us/en/develop/download/intel-architecture-instruction-set-extensions-programming-reference.html
Reviewed By: RKSimon, skan
Differential Revision: https://reviews.llvm.org/D155146
Added:
clang/lib/Headers/sha512intrin.h
clang/test/CodeGen/X86/sha512-builtins.c
llvm/test/CodeGen/X86/sha512-intrinsics.ll
llvm/test/MC/Disassembler/X86/sha512-32.txt
llvm/test/MC/Disassembler/X86/sha512-64.txt
llvm/test/MC/X86/sha512-32-att.s
llvm/test/MC/X86/sha512-32-intel.s
llvm/test/MC/X86/sha512-64-att.s
llvm/test/MC/X86/sha512-64-intel.s
Modified:
clang/docs/ReleaseNotes.rst
clang/include/clang/Basic/BuiltinsX86.def
clang/include/clang/Driver/Options.td
clang/lib/Basic/Targets/X86.cpp
clang/lib/Basic/Targets/X86.h
clang/lib/Headers/CMakeLists.txt
clang/lib/Headers/immintrin.h
clang/test/CodeGen/attr-target-x86.c
clang/test/Driver/x86-target-features.c
clang/test/Preprocessor/x86_target_features.c
llvm/docs/ReleaseNotes.rst
llvm/include/llvm/IR/IntrinsicsX86.td
llvm/include/llvm/TargetParser/X86TargetParser.def
llvm/lib/Target/X86/X86.td
llvm/lib/Target/X86/X86InstrInfo.td
llvm/lib/Target/X86/X86InstrSSE.td
llvm/lib/TargetParser/Host.cpp
llvm/lib/TargetParser/X86TargetParser.cpp
Removed:
################################################################################
diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index d09bffb0e25062..21d8e34a25e804 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -813,6 +813,10 @@ X86 Support
- Add ISA of ``AMX-COMPLEX`` which supports ``tcmmimfp16ps`` and
``tcmmrlfp16ps``.
+- Support ISA of ``SHA512``.
+ * Support intrinsic of ``_mm256_sha512msg1_epi64``.
+ * Support intrinsic of ``_mm256_sha512msg2_epi64``.
+ * Support intrinsic of ``_mm256_sha512rnds2_epi64``.
Arm and AArch64 Support
^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index 122896b417c845..fd4f769994a999 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -2132,6 +2132,11 @@ TARGET_BUILTIN(__builtin_ia32_vcvtneoph2ps256, "V8fV16xC*", "nV:256:", "avxnecon
TARGET_BUILTIN(__builtin_ia32_vcvtneps2bf16128, "V8yV4f", "nV:128:", "avx512bf16,avx512vl|avxneconvert")
TARGET_BUILTIN(__builtin_ia32_vcvtneps2bf16256, "V8yV8f", "nV:256:", "avx512bf16,avx512vl|avxneconvert")
+// SHA512
+TARGET_BUILTIN(__builtin_ia32_vsha512msg1, "V4ULLiV4ULLiV2ULLi", "nV:256:", "sha512")
+TARGET_BUILTIN(__builtin_ia32_vsha512msg2, "V4ULLiV4ULLiV4ULLi", "nV:256:", "sha512")
+TARGET_BUILTIN(__builtin_ia32_vsha512rnds2, "V4ULLiV4ULLiV4ULLiV2ULLi", "nV:256:", "sha512")
+
TARGET_HEADER_BUILTIN(_InterlockedAnd64, "WiWiD*Wi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_InterlockedDecrement64, "WiWiD*", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
TARGET_HEADER_BUILTIN(_InterlockedExchange64, "WiWiD*Wi", "nh", INTRIN_H, ALL_MS_LANGUAGES, "")
diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td
index 8ffb9388d330f0..df5c091cf12db1 100644
--- a/clang/include/clang/Driver/Options.td
+++ b/clang/include/clang/Driver/Options.td
@@ -5056,6 +5056,8 @@ def msgx : Flag<["-"], "msgx">, Group<m_x86_Features_Group>;
def mno_sgx : Flag<["-"], "mno-sgx">, Group<m_x86_Features_Group>;
def msha : Flag<["-"], "msha">, Group<m_x86_Features_Group>;
def mno_sha : Flag<["-"], "mno-sha">, Group<m_x86_Features_Group>;
+def msha512 : Flag<["-"], "msha512">, Group<m_x86_Features_Group>;
+def mno_sha512 : Flag<["-"], "mno-sha512">, Group<m_x86_Features_Group>;
def mtbm : Flag<["-"], "mtbm">, Group<m_x86_Features_Group>;
def mno_tbm : Flag<["-"], "mno-tbm">, Group<m_x86_Features_Group>;
def mtsxldtrk : Flag<["-"], "mtsxldtrk">, Group<m_x86_Features_Group>;
diff --git a/clang/lib/Basic/Targets/X86.cpp b/clang/lib/Basic/Targets/X86.cpp
index 08d2722a8e52c0..b09ec21f77dbed 100644
--- a/clang/lib/Basic/Targets/X86.cpp
+++ b/clang/lib/Basic/Targets/X86.cpp
@@ -261,6 +261,8 @@ bool X86TargetInfo::handleTargetFeatures(std::vector<std::string> &Features,
HasAVX512VP2INTERSECT = true;
} else if (Feature == "+sha") {
HasSHA = true;
+ } else if (Feature == "+sha512") {
+ HasSHA512 = true;
} else if (Feature == "+shstk") {
HasSHSTK = true;
} else if (Feature == "+movbe") {
@@ -749,6 +751,8 @@ void X86TargetInfo::getTargetDefines(const LangOptions &Opts,
Builder.defineMacro("__AVX512VP2INTERSECT__");
if (HasSHA)
Builder.defineMacro("__SHA__");
+ if (HasSHA512)
+ Builder.defineMacro("__SHA512__");
if (HasFXSR)
Builder.defineMacro("__FXSR__");
@@ -999,6 +1003,7 @@ bool X86TargetInfo::isValidFeatureName(StringRef Name) const {
.Case("serialize", true)
.Case("sgx", true)
.Case("sha", true)
+ .Case("sha512", true)
.Case("shstk", true)
.Case("sse", true)
.Case("sse2", true)
@@ -1104,6 +1109,7 @@ bool X86TargetInfo::hasFeature(StringRef Feature) const {
.Case("serialize", HasSERIALIZE)
.Case("sgx", HasSGX)
.Case("sha", HasSHA)
+ .Case("sha512", HasSHA512)
.Case("shstk", HasSHSTK)
.Case("sse", SSELevel >= SSE1)
.Case("sse2", SSELevel >= SSE2)
diff --git a/clang/lib/Basic/Targets/X86.h b/clang/lib/Basic/Targets/X86.h
index 3da484fb9e874f..e7bc2d7ef3f61a 100644
--- a/clang/lib/Basic/Targets/X86.h
+++ b/clang/lib/Basic/Targets/X86.h
@@ -112,6 +112,7 @@ class LLVM_LIBRARY_VISIBILITY X86TargetInfo : public TargetInfo {
bool HasAVX512IFMA = false;
bool HasAVX512VP2INTERSECT = false;
bool HasSHA = false;
+ bool HasSHA512 = false;
bool HasSHSTK = false;
bool HasSGX = false;
bool HasCX8 = false;
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 5a219e234452a9..44fb2bb899264c 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -203,6 +203,7 @@ set(x86_files
rtmintrin.h
serializeintrin.h
sgxintrin.h
+ sha512intrin.h
shaintrin.h
smmintrin.h
tbmintrin.h
diff --git a/clang/lib/Headers/immintrin.h b/clang/lib/Headers/immintrin.h
index c5f84ae0286b73..e8dcdc95b22bad 100644
--- a/clang/lib/Headers/immintrin.h
+++ b/clang/lib/Headers/immintrin.h
@@ -269,6 +269,11 @@
#include <avxneconvertintrin.h>
#endif
+#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
+ defined(__SHA512__)
+#include <sha512intrin.h>
+#endif
+
#if !(defined(_MSC_VER) || defined(__SCE__)) || __has_feature(modules) || \
defined(__RDPID__)
/// Returns the value of the IA32_TSC_AUX MSR (0xc0000103).
diff --git a/clang/lib/Headers/sha512intrin.h b/clang/lib/Headers/sha512intrin.h
new file mode 100644
index 00000000000000..065ef5dac25aad
--- /dev/null
+++ b/clang/lib/Headers/sha512intrin.h
@@ -0,0 +1,200 @@
+/*===--------------- sha512intrin.h - SHA512 intrinsics -----------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __IMMINTRIN_H
+#error "Never use <sha512intrin.h> directly; include <immintrin.h> instead."
+#endif // __IMMINTRIN_H
+
+#ifndef __SHA512INTRIN_H
+#define __SHA512INTRIN_H
+
+#define __DEFAULT_FN_ATTRS256 \
+ __attribute__((__always_inline__, __nodebug__, __target__("sha512"), \
+ __min_vector_width__(256)))
+
+/// This intrinisc is one of the two SHA512 message scheduling instructions.
+/// The intrinsic performs an intermediate calculation for the next four
+/// SHA512 message qwords. The calculated results are stored in \a dst.
+///
+/// \headerfile <immintrin.h>
+///
+/// \code
+/// __m256i _mm256_sha512msg1_epi64(__m256i __A, __m128i __B)
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VSHA512MSG1 instruction.
+///
+/// \param __A
+/// A 256-bit vector of [4 x long long].
+/// \param __B
+/// A 128-bit vector of [2 x long long].
+/// \returns
+/// A 256-bit vector of [4 x long long].
+///
+/// \code{.operation}
+/// DEFINE ROR64(qword, n) {
+/// count := n % 64
+/// dest := (qword >> count) | (qword << (64 - count))
+/// RETURN dest
+/// }
+/// DEFINE SHR64(qword, n) {
+/// RETURN qword >> n
+/// }
+/// DEFINE s0(qword):
+/// RETURN ROR64(qword,1) ^ ROR64(qword, 8) ^ SHR64(qword, 7)
+/// }
+/// W[4] := __B.qword[0]
+/// W[3] := __A.qword[3]
+/// W[2] := __A.qword[2]
+/// W[1] := __A.qword[1]
+/// W[0] := __A.qword[0]
+/// dst.qword[3] := W[3] + s0(W[4])
+/// dst.qword[2] := W[2] + s0(W[3])
+/// dst.qword[1] := W[1] + s0(W[2])
+/// dst.qword[0] := W[0] + s0(W[1])
+/// dst[MAX:256] := 0
+/// \endcode
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_sha512msg1_epi64(__m256i __A, __m128i __B) {
+ return (__m256i)__builtin_ia32_vsha512msg1((__v4du)__A, (__v2du)__B);
+}
+
+/// This intrinisc is one of the two SHA512 message scheduling instructions.
+/// The intrinsic performs the final calculation for the next four SHA512
+/// message qwords. The calculated results are stored in \a dst.
+///
+/// \headerfile <immintrin.h>
+///
+/// \code
+/// __m256i _mm256_sha512msg2_epi64(__m256i __A, __m256i __B)
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VSHA512MSG2 instruction.
+///
+/// \param __A
+/// A 256-bit vector of [4 x long long].
+/// \param __B
+/// A 256-bit vector of [4 x long long].
+/// \returns
+/// A 256-bit vector of [4 x long long].
+///
+/// \code{.operation}
+/// DEFINE ROR64(qword, n) {
+/// count := n % 64
+/// dest := (qword >> count) | (qword << (64 - count))
+/// RETURN dest
+/// }
+/// DEFINE SHR64(qword, n) {
+/// RETURN qword >> n
+/// }
+/// DEFINE s1(qword) {
+/// RETURN ROR64(qword,19) ^ ROR64(qword, 61) ^ SHR64(qword, 6)
+/// }
+/// W[14] := __B.qword[2]
+/// W[15] := __B.qword[3]
+/// W[16] := __A.qword[0] + s1(W[14])
+/// W[17] := __A.qword[1] + s1(W[15])
+/// W[18] := __A.qword[2] + s1(W[16])
+/// W[19] := __A.qword[3] + s1(W[17])
+/// dst.qword[3] := W[19]
+/// dst.qword[2] := W[18]
+/// dst.qword[1] := W[17]
+/// dst.qword[0] := W[16]
+/// dst[MAX:256] := 0
+/// \endcode
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_sha512msg2_epi64(__m256i __A, __m256i __B) {
+ return (__m256i)__builtin_ia32_vsha512msg2((__v4du)__A, (__v4du)__B);
+}
+
+/// This intrinisc performs two rounds of SHA512 operation using initial SHA512
+/// state (C,D,G,H) from \a __A, an initial SHA512 state (A,B,E,F) from
+/// \a __A, and a pre-computed sum of the next two round message qwords and
+/// the corresponding round constants from \a __C (only the two lower qwords
+/// of the third operand). The updated SHA512 state (A,B,E,F) is written to
+/// \a __A, and \a __A can be used as the updated state (C,D,G,H) in later
+/// rounds.
+///
+/// \headerfile <immintrin.h>
+///
+/// \code
+/// __m256i _mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C)
+/// \endcode
+///
+/// This intrinsic corresponds to the \c VSHA512RNDS2 instruction.
+///
+/// \param __A
+/// A 256-bit vector of [4 x long long].
+/// \param __B
+/// A 256-bit vector of [4 x long long].
+/// \param __C
+/// A 128-bit vector of [2 x long long].
+/// \returns
+/// A 256-bit vector of [4 x long long].
+///
+/// \code{.operation}
+/// DEFINE ROR64(qword, n) {
+/// count := n % 64
+/// dest := (qword >> count) | (qword << (64 - count))
+/// RETURN dest
+/// }
+/// DEFINE SHR64(qword, n) {
+/// RETURN qword >> n
+/// }
+/// DEFINE cap_sigma0(qword) {
+/// RETURN ROR64(qword,28) ^ ROR64(qword, 34) ^ ROR64(qword, 39)
+/// }
+/// DEFINE cap_sigma1(qword) {
+/// RETURN ROR64(qword,14) ^ ROR64(qword, 18) ^ ROR64(qword, 41)
+/// }
+/// DEFINE MAJ(a,b,c) {
+/// RETURN (a & b) ^ (a & c) ^ (b & c)
+/// }
+/// DEFINE CH(e,f,g) {
+/// RETURN (e & f) ^ (g & ~e)
+/// }
+/// A[0] := __B.qword[3]
+/// B[0] := __B.qword[2]
+/// C[0] := __C.qword[3]
+/// D[0] := __C.qword[2]
+/// E[0] := __B.qword[1]
+/// F[0] := __B.qword[0]
+/// G[0] := __C.qword[1]
+/// H[0] := __C.qword[0]
+/// WK[0]:= __A.qword[0]
+/// WK[1]:= __A.qword[1]
+/// FOR i := 0 to 1:
+/// A[i+1] := CH(E[i], F[i], G[i]) +
+/// cap_sigma1(E[i]) + WK[i] + H[i] +
+/// MAJ(A[i], B[i], C[i]) +
+/// cap_sigma0(A[i])
+/// B[i+1] := A[i]
+/// C[i+1] := B[i]
+/// D[i+1] := C[i]
+/// E[i+1] := CH(E[i], F[i], G[i]) +
+/// cap_sigma1(E[i]) + WK[i] + H[i] + D[i]
+/// F[i+1] := E[i]
+/// G[i+1] := F[i]
+/// H[i+1] := G[i]
+/// ENDFOR
+/// dst.qword[3] := A[2]
+/// dst.qword[2] := B[2]
+/// dst.qword[1] := E[2]
+/// dst.qword[0] := F[2]
+/// dst[MAX:256] := 0
+/// \endcode
+static __inline__ __m256i __DEFAULT_FN_ATTRS256
+_mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C) {
+ return (__m256i)__builtin_ia32_vsha512rnds2((__v4du)__A, (__v4du)__B,
+ (__v2du)__C);
+}
+
+#undef __DEFAULT_FN_ATTRS256
+
+#endif // __SHA512INTRIN_H
diff --git a/clang/test/CodeGen/X86/sha512-builtins.c b/clang/test/CodeGen/X86/sha512-builtins.c
new file mode 100644
index 00000000000000..04b719e5d50e9d
--- /dev/null
+++ b/clang/test/CodeGen/X86/sha512-builtins.c
@@ -0,0 +1,22 @@
+// RUN: %clang_cc1 %s -ffreestanding -triple=x86_64-unknown-unknown -target-feature +sha512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+// RUN: %clang_cc1 %s -ffreestanding -triple=i386-unknown-unknown -target-feature +sha512 -emit-llvm -o - -Wall -Werror | FileCheck %s
+
+#include <immintrin.h>
+
+__m256i test_mm256_sha512msg1_epi64(__m256i __A, __m128i __B) {
+ // CHECK-LABEL: @test_mm256_sha512msg1_epi64(
+ // CHECK: call <4 x i64> @llvm.x86.vsha512msg1(<4 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ return _mm256_sha512msg1_epi64(__A, __B);
+}
+
+__m256i test_mm256_sha512msg2_epi64(__m256i __A, __m256i __B) {
+ // CHECK-LABEL: @test_mm256_sha512msg2_epi64(
+ // CHECK: call <4 x i64> @llvm.x86.vsha512msg2(<4 x i64> %{{.*}}, <4 x i64> %{{.*}})
+ return _mm256_sha512msg2_epi64(__A, __B);
+}
+
+__m256i test_mm256_sha512rnds2_epi64(__m256i __A, __m256i __B, __m128i __C) {
+ // CHECK-LABEL: @test_mm256_sha512rnds2_epi64(
+ // CHECK: call <4 x i64> @llvm.x86.vsha512rnds2(<4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <2 x i64> %{{.*}})
+ return _mm256_sha512rnds2_epi64(__A, __B, __C);
+}
diff --git a/clang/test/CodeGen/attr-target-x86.c b/clang/test/CodeGen/attr-target-x86.c
index f22203f3ceadc5..d4a1732aad041c 100644
--- a/clang/test/CodeGen/attr-target-x86.c
+++ b/clang/test/CodeGen/attr-target-x86.c
@@ -54,9 +54,9 @@ void __attribute__((target("arch=x86-64-v4"))) x86_64_v4(void) {}
// CHECK: #0 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87" "tune-cpu"="i686"
// CHECK: #1 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt"
// CHECK-NOT: tune-cpu
-// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
+// CHECK: #2 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-aes,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-gfni,-kl,-pclmul,-sha,-sha512,-sse2,-sse3,-sse4.1,-sse4.2,-sse4a,-ssse3,-vaes,-vpclmulqdq,-widekl,-xop" "tune-cpu"="i686"
// CHECK: #3 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+crc32,+cx8,+mmx,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87" "tune-cpu"="i686"
-// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
+// CHECK: #4 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-avx,-avx2,-avx512bf16,-avx512bitalg,-avx512bw,-avx512cd,-avx512dq,-avx512er,-avx512f,-avx512fp16,-avx512ifma,-avx512pf,-avx512vbmi,-avx512vbmi2,-avx512vl,-avx512vnni,-avx512vp2intersect,-avx512vpopcntdq,-avxifma,-avxneconvert,-avxvnni,-avxvnniint8,-f16c,-fma,-fma4,-sha512,-sse4.1,-sse4.2,-vaes,-vpclmulqdq,-xop" "tune-cpu"="i686"
// CHECK: #5 = {{.*}}"target-cpu"="ivybridge" "target-features"="+avx,+cmov,+crc32,+cx16,+cx8,+f16c,+fsgsbase,+fxsr,+mmx,+pclmul,+popcnt,+rdrnd,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt,-aes,-vaes"
// CHECK-NOT: tune-cpu
// CHECK: #6 = {{.*}}"target-cpu"="i686" "target-features"="+cmov,+cx8,+x87,-3dnow,-3dnowa,-mmx"
diff --git a/clang/test/Driver/x86-target-features.c b/clang/test/Driver/x86-target-features.c
index 71bdd2a9c29817..b811e23ced4b7f 100644
--- a/clang/test/Driver/x86-target-features.c
+++ b/clang/test/Driver/x86-target-features.c
@@ -349,6 +349,11 @@
// AVXNECONVERT: "-target-feature" "+avxneconvert"
// NO-AVXNECONVERT: "-target-feature" "-avxneconvert"
+// RUN: %clang --target=i386 -msha512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=SHA512 %s
+// RUN: %clang --target=i386 -mno-sha512 %s -### -o %t.o 2>&1 | FileCheck -check-prefix=NO-SHA512 %s
+// SHA512: "-target-feature" "+sha512"
+// NO-SHA512: "-target-feature" "-sha512"
+
// RUN: %clang --target=i386 -march=i386 -mcrc32 %s -### 2>&1 | FileCheck -check-prefix=CRC32 %s
// RUN: %clang --target=i386 -march=i386 -mno-crc32 %s -### 2>&1 | FileCheck -check-prefix=NO-CRC32 %s
// CRC32: "-target-feature" "+crc32"
diff --git a/clang/test/Preprocessor/x86_target_features.c b/clang/test/Preprocessor/x86_target_features.c
index 4481351b4e7916..0972997342678d 100644
--- a/clang/test/Preprocessor/x86_target_features.c
+++ b/clang/test/Preprocessor/x86_target_features.c
@@ -660,6 +660,19 @@
// AVXNECONVERTNOAVX2-NOT: #define __AVX2__ 1
// AVXNECONVERTNOAVX2-NOT: #define __AVXNECONVERT__ 1
+// RUN: %clang -target i386-unknown-linux-gnu -march=atom -msha512 -x c -E -dM -o - %s | FileCheck -check-prefix=SHA512 %s
+
+// SHA512: #define __AVX__ 1
+// SHA512: #define __SHA512__ 1
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=atom -mno-sha512 -x c -E -dM -o - %s | FileCheck -check-prefix=NOSHA512 %s
+// NOSHA512-NOT: #define __SHA512__ 1
+
+// RUN: %clang -target i386-unknown-linux-gnu -march=atom -msha512 -mno-avx -x c -E -dM -o - %s | FileCheck -check-prefix=SHA512NOAVX %s
+
+// SHA512NOAVX-NOT: #define __AVX__ 1
+// SHA512NOAVX-NOT: #define __SHA512__ 1
+
// RUN: %clang -target i386-unknown-linux-gnu -march=i386 -mcrc32 -x c -E -dM -o - %s | FileCheck -check-prefix=CRC32 %s
// CRC32: #define __CRC32__ 1
diff --git a/llvm/docs/ReleaseNotes.rst b/llvm/docs/ReleaseNotes.rst
index 918453afc16045..74a9eb1eb73a2f 100644
--- a/llvm/docs/ReleaseNotes.rst
+++ b/llvm/docs/ReleaseNotes.rst
@@ -279,7 +279,7 @@ Changes to the X86 Backend
* ``__builtin_unpredictable`` (unpredictable metadata in LLVM IR), is handled by X86 Backend.
``X86CmovConversion`` pass now respects this builtin and does not convert CMOVs to branches.
* Add support for the ``PBNDKB`` instruction.
-
+* Support ISA of ``SHA512``.
Changes to the OCaml bindings
-----------------------------
diff --git a/llvm/include/llvm/IR/IntrinsicsX86.td b/llvm/include/llvm/IR/IntrinsicsX86.td
index ed10a84835ac13..b60675a85910a9 100644
--- a/llvm/include/llvm/IR/IntrinsicsX86.td
+++ b/llvm/include/llvm/IR/IntrinsicsX86.td
@@ -5105,6 +5105,20 @@ let TargetPrefix = "x86" in {
[IntrNoMem]>;
}
+//===----------------------------------------------------------------------===//
+// SHA512 intrinsics
+let TargetPrefix = "x86" in {
+def int_x86_vsha512msg1 : ClangBuiltin<"__builtin_ia32_vsha512msg1">,
+ DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+def int_x86_vsha512msg2 : ClangBuiltin<"__builtin_ia32_vsha512msg2">,
+ DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty],
+ [IntrNoMem]>;
+def int_x86_vsha512rnds2 : ClangBuiltin<"__builtin_ia32_vsha512rnds2">,
+ DefaultAttrsIntrinsic<[llvm_v4i64_ty], [llvm_v4i64_ty, llvm_v4i64_ty, llvm_v2i64_ty],
+ [IntrNoMem]>;
+}
+
//===----------------------------------------------------------------------===//
// Thread synchronization ops with timer.
let TargetPrefix = "x86" in {
diff --git a/llvm/include/llvm/TargetParser/X86TargetParser.def b/llvm/include/llvm/TargetParser/X86TargetParser.def
index a2fb6b5c3510ee..0e3008bf6ca128 100644
--- a/llvm/include/llvm/TargetParser/X86TargetParser.def
+++ b/llvm/include/llvm/TargetParser/X86TargetParser.def
@@ -227,6 +227,7 @@ X86_FEATURE (AVXNECONVERT, "avxneconvert")
X86_FEATURE (AVXVNNI, "avxvnni")
X86_FEATURE (AVXIFMA, "avxifma")
X86_FEATURE (AVXVNNIINT8, "avxvnniint8")
+X86_FEATURE (SHA512, "sha512")
// These features aren't really CPU features, but the frontend can set them.
X86_FEATURE (RETPOLINE_EXTERNAL_THUNK, "retpoline-external-thunk")
X86_FEATURE (RETPOLINE_INDIRECT_BRANCHES, "retpoline-indirect-branches")
diff --git a/llvm/lib/Target/X86/X86.td b/llvm/lib/Target/X86/X86.td
index e966bbece96b84..5abbcea3f8681a 100644
--- a/llvm/lib/Target/X86/X86.td
+++ b/llvm/lib/Target/X86/X86.td
@@ -238,6 +238,9 @@ def FeatureADX : SubtargetFeature<"adx", "HasADX", "true",
def FeatureSHA : SubtargetFeature<"sha", "HasSHA", "true",
"Enable SHA instructions",
[FeatureSSE2]>;
+def FeatureSHA512 : SubtargetFeature<"sha512", "HasSHA512", "true",
+ "Support SHA512 instructions",
+ [FeatureAVX]>;
// Processor supports CET SHSTK - Control-Flow Enforcement Technology
// using Shadow Stack
def FeatureSHSTK : SubtargetFeature<"shstk", "HasSHSTK", "true",
@@ -1066,6 +1069,7 @@ def ProcessorFeatures {
// Graniterapids
list<SubtargetFeature> GNRAdditionalFeatures = [FeatureAMXFP16,
FeaturePREFETCHI,
+ FeatureSHA512,
FeatureAMXCOMPLEX];
list<SubtargetFeature> GNRFeatures =
!listconcat(SPRFeatures, GNRAdditionalFeatures);
diff --git a/llvm/lib/Target/X86/X86InstrInfo.td b/llvm/lib/Target/X86/X86InstrInfo.td
index 938c2be43b0618..5cd230f346f9d1 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.td
+++ b/llvm/lib/Target/X86/X86InstrInfo.td
@@ -967,6 +967,7 @@ def NoVLX_Or_NoIFMA : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasIFMA()">
def HasRTM : Predicate<"Subtarget->hasRTM()">;
def HasADX : Predicate<"Subtarget->hasADX()">;
def HasSHA : Predicate<"Subtarget->hasSHA()">;
+def HasSHA512 : Predicate<"Subtarget->hasSHA512()">;
def HasSGX : Predicate<"Subtarget->hasSGX()">;
def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">;
def HasSSEPrefetch : Predicate<"Subtarget->hasSSEPrefetch()">;
diff --git a/llvm/lib/Target/X86/X86InstrSSE.td b/llvm/lib/Target/X86/X86InstrSSE.td
index b31606b8f9b431..5ed77a1c16b382 100644
--- a/llvm/lib/Target/X86/X86InstrSSE.td
+++ b/llvm/lib/Target/X86/X86InstrSSE.td
@@ -8295,3 +8295,25 @@ def : InstAlias<"vcvtneps2bf16x\t{$src, $dst|$dst, $src}",
(VCVTNEPS2BF16rr VR128:$dst, VR128:$src), 0, "att">;
def : InstAlias<"vcvtneps2bf16y\t{$src, $dst|$dst, $src}",
(VCVTNEPS2BF16Yrr VR128:$dst, VR256:$src), 0, "att">;
+
+// FIXME: Is there a better scheduler class for SHA512 than WriteVecIMul?
+let Predicates = [HasSHA512], Constraints = "$src1 = $dst" in {
+def VSHA512MSG1rr : I<0xcc, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR128:$src2),
+ "vsha512msg1\t{$src2, $dst|$dst, $src2}",
+ [(set VR256:$dst,
+ (int_x86_vsha512msg1 VR256:$src1, VR128:$src2))]>, VEX_L,
+ VEX, T8XD, Sched<[WriteVecIMul]>;
+def VSHA512MSG2rr : I<0xcd, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2),
+ "vsha512msg2\t{$src2, $dst|$dst, $src2}",
+ [(set VR256:$dst,
+ (int_x86_vsha512msg2 VR256:$src1, VR256:$src2))]>, VEX_L,
+ VEX, T8XD, Sched<[WriteVecIMul]>;
+def VSHA512RNDS2rr : I<0xcb, MRMSrcReg, (outs VR256:$dst),
+ (ins VR256:$src1, VR256:$src2, VR128:$src3),
+ "vsha512rnds2\t{$src3, $src2, $dst|$dst, $src2, $src3}",
+ [(set VR256:$dst,
+ (int_x86_vsha512rnds2 VR256:$src1, VR256:$src2, VR128:$src3))]>,
+ VEX_L, VEX_4V, T8XD, Sched<[WriteVecIMul]>;
+}
diff --git a/llvm/lib/TargetParser/Host.cpp b/llvm/lib/TargetParser/Host.cpp
index 8b17118401cb47..378dac9c5f02c8 100644
--- a/llvm/lib/TargetParser/Host.cpp
+++ b/llvm/lib/TargetParser/Host.cpp
@@ -1746,6 +1746,7 @@ bool sys::getHostCPUFeatures(StringMap<bool> &Features) {
Features["amx-int8"] = HasLeaf7 && ((EDX >> 25) & 1) && HasAMXSave;
bool HasLeaf7Subleaf1 =
MaxLevel >= 7 && !getX86CpuIDAndInfoEx(0x7, 0x1, &EAX, &EBX, &ECX, &EDX);
+ Features["sha512"] = HasLeaf7Subleaf1 && ((EAX >> 0) & 1);
Features["raoint"] = HasLeaf7Subleaf1 && ((EAX >> 3) & 1);
Features["avxvnni"] = HasLeaf7Subleaf1 && ((EAX >> 4) & 1) && HasAVXSave;
Features["avx512bf16"] = HasLeaf7Subleaf1 && ((EAX >> 5) & 1) && HasAVX512Save;
diff --git a/llvm/lib/TargetParser/X86TargetParser.cpp b/llvm/lib/TargetParser/X86TargetParser.cpp
index fcf54ae1f745b7..39f03aa05c470d 100644
--- a/llvm/lib/TargetParser/X86TargetParser.cpp
+++ b/llvm/lib/TargetParser/X86TargetParser.cpp
@@ -655,6 +655,7 @@ constexpr FeatureBitset ImpliedFeaturesRAOINT = {};
constexpr FeatureBitset ImpliedFeaturesAVXVNNIINT8 = FeatureAVX2;
constexpr FeatureBitset ImpliedFeaturesAVXIFMA = FeatureAVX2;
constexpr FeatureBitset ImpliedFeaturesAVXNECONVERT = FeatureAVX2;
+constexpr FeatureBitset ImpliedFeaturesSHA512 = FeatureAVX;
constexpr FeatureBitset ImpliedFeaturesAVX512FP16 =
FeatureAVX512BW | FeatureAVX512DQ | FeatureAVX512VL;
// Key Locker Features
diff --git a/llvm/test/CodeGen/X86/sha512-intrinsics.ll b/llvm/test/CodeGen/X86/sha512-intrinsics.ll
new file mode 100644
index 00000000000000..bd2e4559f09b5e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/sha512-intrinsics.ll
@@ -0,0 +1,33 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc < %s -verify-machineinstrs -mtriple=x86_64-unknown-unknown --show-mc-encoding -mattr=+sha512 | FileCheck %s
+; RUN: llc < %s -verify-machineinstrs -mtriple=i686-unknown-unknown --show-mc-encoding -mattr=+sha512 | FileCheck %s
+
+define <4 x i64> @test_int_x86_vsha512msg1(<4 x i64> %A, <2 x i64> %B) {
+; CHECK-LABEL: test_int_x86_vsha512msg1:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsha512msg1 %xmm1, %ymm0 # encoding: [0xc4,0xe2,0x7f,0xcc,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <4 x i64> @llvm.x86.vsha512msg1(<4 x i64> %A, <2 x i64> %B)
+ ret <4 x i64> %ret
+}
+declare <4 x i64> @llvm.x86.vsha512msg1(<4 x i64> %A, <2 x i64> %B)
+
+define <4 x i64> @test_int_x86_vsha512msg2(<4 x i64> %A, <4 x i64> %B) {
+; CHECK-LABEL: test_int_x86_vsha512msg2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsha512msg2 %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x7f,0xcd,0xc1]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <4 x i64> @llvm.x86.vsha512msg2(<4 x i64> %A, <4 x i64> %B)
+ ret <4 x i64> %ret
+}
+declare <4 x i64> @llvm.x86.vsha512msg2(<4 x i64> %A, <4 x i64> %B)
+
+define <4 x i64> @test_int_x86_vsha512rnds2(<4 x i64> %A, <4 x i64> %B, <2 x i64> %C) {
+; CHECK-LABEL: test_int_x86_vsha512rnds2:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsha512rnds2 %xmm2, %ymm1, %ymm0 # encoding: [0xc4,0xe2,0x77,0xcb,0xc2]
+; CHECK-NEXT: ret{{[l|q]}} # encoding: [0xc3]
+ %ret = call <4 x i64> @llvm.x86.vsha512rnds2(<4 x i64> %A, <4 x i64> %B, <2 x i64> %C)
+ ret <4 x i64> %ret
+}
+declare <4 x i64> @llvm.x86.vsha512rnds2(<4 x i64> %A, <4 x i64> %B, <2 x i64> %C)
diff --git a/llvm/test/MC/Disassembler/X86/sha512-32.txt b/llvm/test/MC/Disassembler/X86/sha512-32.txt
new file mode 100644
index 00000000000000..a3b16fd18285bf
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/sha512-32.txt
@@ -0,0 +1,15 @@
+# RUN: llvm-mc --disassemble %s -triple=i386 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=i386 --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vsha512msg1 %xmm3, %ymm2
+# INTEL: vsha512msg1 ymm2, xmm3
+0xc4,0xe2,0x7f,0xcc,0xd3
+
+# ATT: vsha512msg2 %ymm3, %ymm2
+# INTEL: vsha512msg2 ymm2, ymm3
+0xc4,0xe2,0x7f,0xcd,0xd3
+
+# ATT: vsha512rnds2 %xmm4, %ymm3, %ymm2
+# INTEL: vsha512rnds2 ymm2, ymm3, xmm4
+0xc4,0xe2,0x67,0xcb,0xd4
+
diff --git a/llvm/test/MC/Disassembler/X86/sha512-64.txt b/llvm/test/MC/Disassembler/X86/sha512-64.txt
new file mode 100644
index 00000000000000..251585ec8802f0
--- /dev/null
+++ b/llvm/test/MC/Disassembler/X86/sha512-64.txt
@@ -0,0 +1,15 @@
+# RUN: llvm-mc --disassemble %s -triple=x86_64 | FileCheck %s --check-prefixes=ATT
+# RUN: llvm-mc --disassemble %s -triple=x86_64 --output-asm-variant=1 | FileCheck %s --check-prefixes=INTEL
+
+# ATT: vsha512msg1 %xmm3, %ymm12
+# INTEL: vsha512msg1 ymm12, xmm3
+0xc4,0x62,0x7f,0xcc,0xe3
+
+# ATT: vsha512msg2 %ymm3, %ymm12
+# INTEL: vsha512msg2 ymm12, ymm3
+0xc4,0x62,0x7f,0xcd,0xe3
+
+# ATT: vsha512rnds2 %xmm4, %ymm3, %ymm12
+# INTEL: vsha512rnds2 ymm12, ymm3, xmm4
+0xc4,0x62,0x67,0xcb,0xe4
+
diff --git a/llvm/test/MC/X86/sha512-32-att.s b/llvm/test/MC/X86/sha512-32-att.s
new file mode 100644
index 00000000000000..1f1247282064cd
--- /dev/null
+++ b/llvm/test/MC/X86/sha512-32-att.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -triple i686 --show-encoding %s | FileCheck %s
+
+// CHECK: vsha512msg1 %xmm3, %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xcc,0xd3]
+ vsha512msg1 %xmm3, %ymm2
+
+// CHECK: vsha512msg2 %ymm3, %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xcd,0xd3]
+ vsha512msg2 %ymm3, %ymm2
+
+// CHECK: vsha512rnds2 %xmm4, %ymm3, %ymm2
+// CHECK: encoding: [0xc4,0xe2,0x67,0xcb,0xd4]
+ vsha512rnds2 %xmm4, %ymm3, %ymm2
diff --git a/llvm/test/MC/X86/sha512-32-intel.s b/llvm/test/MC/X86/sha512-32-intel.s
new file mode 100644
index 00000000000000..19cdff5d59be9a
--- /dev/null
+++ b/llvm/test/MC/X86/sha512-32-intel.s
@@ -0,0 +1,13 @@
+// RUN: llvm-mc -triple i686 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vsha512msg1 ymm2, xmm3
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xcc,0xd3]
+ vsha512msg1 ymm2, xmm3
+
+// CHECK: vsha512msg2 ymm2, ymm3
+// CHECK: encoding: [0xc4,0xe2,0x7f,0xcd,0xd3]
+ vsha512msg2 ymm2, ymm3
+
+// CHECK: vsha512rnds2 ymm2, ymm3, xmm4
+// CHECK: encoding: [0xc4,0xe2,0x67,0xcb,0xd4]
+ vsha512rnds2 ymm2, ymm3, xmm4
diff --git a/llvm/test/MC/X86/sha512-64-att.s b/llvm/test/MC/X86/sha512-64-att.s
new file mode 100644
index 00000000000000..0b82f70dfc057c
--- /dev/null
+++ b/llvm/test/MC/X86/sha512-64-att.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple x86_64 --show-encoding %s | FileCheck %s
+
+// CHECK: vsha512msg1 %xmm3, %ymm12
+// CHECK: encoding: [0xc4,0x62,0x7f,0xcc,0xe3]
+ vsha512msg1 %xmm3, %ymm12
+
+// CHECK: vsha512msg2 %ymm3, %ymm12
+// CHECK: encoding: [0xc4,0x62,0x7f,0xcd,0xe3]
+ vsha512msg2 %ymm3, %ymm12
+
+// CHECK: vsha512rnds2 %xmm4, %ymm3, %ymm12
+// CHECK: encoding: [0xc4,0x62,0x67,0xcb,0xe4]
+ vsha512rnds2 %xmm4, %ymm3, %ymm12
+
diff --git a/llvm/test/MC/X86/sha512-64-intel.s b/llvm/test/MC/X86/sha512-64-intel.s
new file mode 100644
index 00000000000000..243d0e94e0141a
--- /dev/null
+++ b/llvm/test/MC/X86/sha512-64-intel.s
@@ -0,0 +1,14 @@
+// RUN: llvm-mc -triple x86_64 -x86-asm-syntax=intel -output-asm-variant=1 --show-encoding %s | FileCheck %s
+
+// CHECK: vsha512msg1 ymm12, xmm3
+// CHECK: encoding: [0xc4,0x62,0x7f,0xcc,0xe3]
+ vsha512msg1 ymm12, xmm3
+
+// CHECK: vsha512msg2 ymm12, ymm3
+// CHECK: encoding: [0xc4,0x62,0x7f,0xcd,0xe3]
+ vsha512msg2 ymm12, ymm3
+
+// CHECK: vsha512rnds2 ymm12, ymm3, xmm4
+// CHECK: encoding: [0xc4,0x62,0x67,0xcb,0xe4]
+ vsha512rnds2 ymm12, ymm3, xmm4
+
More information about the cfe-commits
mailing list