[clang] [llvm] [X86][AVX10.2] Support YMM rounding new instructions (PR #101825)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 3 07:07:12 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-x86
@llvm/pr-subscribers-clang-codegen
Author: Phoebe Wang (phoebewang)
<details>
<summary>Changes</summary>
Ref.: https://cdrdv2.intel.com/v1/dl/getContent/828965
---
Patch is 767.98 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/101825.diff
17 Files Affected:
- (modified) clang/include/clang/Basic/BuiltinsX86.def (+120)
- (modified) clang/lib/CodeGen/CGBuiltin.cpp (+120)
- (modified) clang/lib/Headers/avx10_2niintrin.h (+1623)
- (modified) clang/lib/Sema/SemaX86.cpp (+136)
- (modified) clang/test/CodeGen/X86/avx10_2ni-builtins.c (+2344)
- (modified) llvm/include/llvm/IR/IntrinsicsX86.td (+436)
- (modified) llvm/lib/Target/X86/Disassembler/X86Disassembler.cpp (+1-1)
- (modified) llvm/lib/Target/X86/X86InstrAVX10.td (+275-1)
- (modified) llvm/lib/Target/X86/X86InstrAVX512.td (+4-4)
- (modified) llvm/lib/Target/X86/X86InstrFMA3Info.cpp (+6-3)
- (modified) llvm/lib/Target/X86/X86IntrinsicsInfo.h (+176)
- (modified) llvm/test/CodeGen/X86/avx10_2ni-intrinsics.ll (+4253)
- (modified) llvm/test/MC/Disassembler/X86/avx10_2ni-32.txt (+1740)
- (modified) llvm/test/MC/Disassembler/X86/avx10_2ni-64.txt (+1740)
- (modified) llvm/test/MC/X86/avx10_2ni-32-intel.s (+1740)
- (modified) llvm/test/MC/X86/avx10_2ni-64-att.s (+1740)
- (modified) llvm/utils/TableGen/X86DisassemblerTables.cpp (+3-3)
``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsX86.def b/clang/include/clang/Basic/BuiltinsX86.def
index f028711a807c0..b117c6d6d9340 100644
--- a/clang/include/clang/Basic/BuiltinsX86.def
+++ b/clang/include/clang/Basic/BuiltinsX86.def
@@ -1966,6 +1966,126 @@ TARGET_BUILTIN(__builtin_ia32_mpsadbw512, "V32sV64cV64cIc", "ncV:512:", "avx10.2
TARGET_BUILTIN(__builtin_ia32_vaddpd256_round, "V4dV4dV4dIi", "nV:256:", "avx10.2-256")
TARGET_BUILTIN(__builtin_ia32_vaddph256_round, "V16xV16xV16xIi", "nV:256:", "avx10.2-256")
TARGET_BUILTIN(__builtin_ia32_vaddps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcmppd256_round_mask, "UcV4dV4dIiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcmpph256_round_mask, "UsV16xV16xIiUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcmpps256_round_mask, "UcV8fV8fIiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtdq2ph256_round_mask, "V8xV8iV8xUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtdq2ps256_round_mask, "V8fV8iV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2dq256_round_mask, "V4iV4dV4iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2ph256_round_mask, "V8xV4dV8xUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2ps256_round_mask, "V4fV4dV4fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2qq256_round_mask, "V4LLiV4dV4LLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2udq256_round_mask, "V4UiV4dV4UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtpd2uqq256_round_mask, "V4ULLiV4dV4ULLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2dq256_round_mask, "V8iV8xV8iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2pd256_round_mask, "V4dV8xV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2psx256_round_mask, "V8fV8xV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2qq256_round_mask, "V4LLiV8xV4LLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2udq256_round_mask, "V8UiV8xV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uqq256_round_mask, "V4ULLiV8xV4ULLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2uw256_round_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtph2w256_round_mask, "V16sV16xV16sUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2dq256_round_mask, "V8iV8fV8iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2pd256_round_mask, "V4dV4fV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2phx256_round_mask, "V8xV8fV8xUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2qq256_round_mask, "V4LLiV4fV4LLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2udq256_round_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtps2uqq256_round_mask, "V4ULLiV4fV4ULLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtqq2pd256_round_mask, "V4dV4LLiV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtqq2ph256_round_mask, "V8xV4LLiV8xUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtqq2ps256_round_mask, "V4fV4LLiV4fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2dq256_round_mask, "V4iV4dV4iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2qq256_round_mask, "V4LLiV4dV4LLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2udq256_round_mask, "V4UiV4dV4UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttpd2uqq256_round_mask, "V4ULLiV4dV4ULLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2dq256_round_mask, "V8iV8xV8iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2qq256_round_mask, "V4LLiV8xV4LLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2udq256_round_mask, "V8UiV8xV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uqq256_round_mask, "V4ULLiV8xV4ULLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2uw256_round_mask, "V16UsV16xV16UsUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttph2w256_round_mask, "V16sV16xV16sUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2dq256_round_mask, "V8iV8fV8iUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2qq256_round_mask, "V4LLiV4fV4LLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2udq256_round_mask, "V8UiV8fV8UiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvttps2uqq256_round_mask, "V4ULLiV4fV4ULLiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtudq2ph256_round_mask, "V8xV8UiV8xUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtudq2ps256_round_mask, "V8fV8UiV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtuqq2pd256_round_mask, "V4dV4ULLiV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ph256_round_mask, "V8xV4ULLiV8xUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtuqq2ps256_round_mask, "V4fV4ULLiV4fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtuw2ph256_round_mask, "V16xV16UsV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vcvtw2ph256_round_mask, "V16xV16sV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vdivpd256_round, "V4dV4dV4dIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vdivph256_round, "V16xV16xV16xIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vdivps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcph256_round_mask, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcph256_round_maskz, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfcmaddcph256_round_mask3, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfcmulcph256_round_mask, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfixupimmpd256_round_mask, "V4dV4dV4dV4LLiIiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfixupimmpd256_round_maskz, "V4dV4dV4dV4LLiIiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfixupimmps256_round_mask, "V8fV8fV8fV8iIiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfixupimmps256_round_maskz, "V8fV8fV8fV8iIiUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd256_round_mask, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd256_round_maskz, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddpd256_round_mask3, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddph256_round_mask, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddph256_round_maskz, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddph256_round_mask3, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps256_round_mask, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps256_round_maskz, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddps256_round_mask3, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcph256_round_mask, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcph256_round_maskz, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddcph256_round_mask3, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256_round_mask, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256_round_maskz, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubpd256_round_mask3, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubph256_round_mask, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubph256_round_maskz, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubph256_round_mask3, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256_round_mask, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256_round_maskz, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmaddsubps256_round_mask3, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmsubpd256_round_mask3, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmsubph256_round_mask3, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmsubps256_round_mask3, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddpd256_round_mask3, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddph256_round_mask3, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmsubaddps256_round_mask3, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vfmulcph256_round_mask, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vgetexppd256_round_mask, "V4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vgetexpph256_round_mask, "V16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vgetexpps256_round_mask, "V8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vgetmantpd256_round_mask, "V4dV4dIiV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vgetmantph256_round_mask, "V16xV16xIiV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vgetmantps256_round_mask, "V8fV8fIiV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmaxpd256_round, "V4dV4dV4dIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmaxph256_round, "V16xV16xV16xIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmaxps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminpd256_round, "V4dV4dV4dIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminph256_round, "V16xV16xV16xIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vminps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmulpd256_round, "V4dV4dV4dIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmulph256_round, "V16xV16xV16xIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vmulps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrangepd256_round_mask, "V4dV4dV4dIiV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrangeps256_round_mask, "V8fV8fV8fIiV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vreducepd256_round_mask, "V4dV4dIiV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vreduceph256_round_mask, "V16xV16xIiV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vreduceps256_round_mask, "V8fV8fIiV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrndscalepd256_round_mask, "V4dV4dIiV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrndscaleph256_round_mask, "V16xV16xIiV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vrndscaleps256_round_mask, "V8fV8fIiV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vscalefpd256_round_mask, "V4dV4dV4dV4dUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vscalefph256_round_mask, "V16xV16xV16xV16xUsIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vscalefps256_round_mask, "V8fV8fV8fV8fUcIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vsqrtpd256_round, "V4dV4dIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vsqrtph256_round, "V16xV16xIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vsqrtps256_round, "V8fV8fIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vsubpd256_round, "V4dV4dV4dIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vsubph256_round, "V16xV16xV16xIi", "nV:256:", "avx10.2-256")
+TARGET_BUILTIN(__builtin_ia32_vsubps256_round, "V8fV8fV8fIi", "nV:256:", "avx10.2-256")
// AVX-VNNI-INT16
TARGET_BUILTIN(__builtin_ia32_vpdpwsud128, "V4iV4iV4iV4i", "nV:128:", "avxvnniint16")
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 13caab6c42111..51d1162c6e403 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -13938,6 +13938,54 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
break;
+ case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
+ Subtract = true;
+ LLVM_FALLTHROUGH;
+ case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
+ IID = llvm::Intrinsic::x86_avx10_vfmaddph256;
+ break;
+ case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
+ Subtract = true;
+ LLVM_FALLTHROUGH;
+ case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
+ IID = llvm::Intrinsic::x86_avx10_vfmaddsubph256;
+ break;
+ case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
+ Subtract = true;
+ LLVM_FALLTHROUGH;
+ case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
+ IID = llvm::Intrinsic::x86_avx10_vfmaddps256;
+ break;
+ case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
+ Subtract = true;
+ LLVM_FALLTHROUGH;
+ case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
+ IID = llvm::Intrinsic::x86_avx10_vfmaddpd256;
+ break;
+ case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
+ Subtract = true;
+ LLVM_FALLTHROUGH;
+ case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
+ IID = llvm::Intrinsic::x86_avx10_vfmaddsubps256;
+ break;
+ case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
+ Subtract = true;
+ LLVM_FALLTHROUGH;
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
+ IID = llvm::Intrinsic::x86_avx10_vfmaddsubpd256;
+ break;
}
Value *A = Ops[0];
@@ -13977,6 +14025,12 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
case clang::X86::BI__builtin_ia32_vfmaddsubph512_mask:
case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask:
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask:
MaskFalseVal = Ops[0];
break;
case clang::X86::BI__builtin_ia32_vfmaddph512_maskz:
@@ -13985,6 +14039,12 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
case clang::X86::BI__builtin_ia32_vfmaddsubph512_maskz:
case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddph256_round_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddps256_round_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_maskz:
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_maskz:
MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
break;
case clang::X86::BI__builtin_ia32_vfmsubph512_mask3:
@@ -13999,6 +14059,18 @@ static Value *EmitX86FMAExpr(CodeGenFunction &CGF, const CallExpr *E,
case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
+ case clang::X86::BI__builtin_ia32_vfmsubph256_round_mask3:
+ case clang::X86::BI__builtin_ia32_vfmaddph256_round_mask3:
+ case clang::X86::BI__builtin_ia32_vfmsubps256_round_mask3:
+ case clang::X86::BI__builtin_ia32_vfmaddps256_round_mask3:
+ case clang::X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
+ case clang::X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
+ case clang::X86::BI__builtin_ia32_vfmsubaddph256_round_mask3:
+ case clang::X86::BI__builtin_ia32_vfmaddsubph256_round_mask3:
+ case clang::X86::BI__builtin_ia32_vfmsubaddps256_round_mask3:
+ case clang::X86::BI__builtin_ia32_vfmaddsubps256_round_mask3:
+ case clang::X86::BI__builtin_ia32_vfmsubaddpd256_round_mask3:
+ case clang::X86::BI__builtin_ia32_vfmaddsubpd256_round_mask3:
MaskFalseVal = Ops[2];
break;
}
@@ -14686,6 +14758,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vcvtw2ph512_mask:
case X86::BI__builtin_ia32_vcvtdq2ph512_mask:
case X86::BI__builtin_ia32_vcvtqq2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtdq2ph256_round_mask:
+ case X86::BI__builtin_ia32_vcvtdq2ps256_round_mask:
+ case X86::BI__builtin_ia32_vcvtqq2pd256_round_mask:
+ case X86::BI__builtin_ia32_vcvtqq2ph256_round_mask:
+ case X86::BI__builtin_ia32_vcvtqq2ps256_round_mask:
+ case X86::BI__builtin_ia32_vcvtw2ph256_round_mask:
return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ true);
case X86::BI__builtin_ia32_cvtudq2ps512_mask:
case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
@@ -14693,6 +14771,12 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vcvtuw2ph512_mask:
case X86::BI__builtin_ia32_vcvtudq2ph512_mask:
case X86::BI__builtin_ia32_vcvtuqq2ph512_mask:
+ case X86::BI__builtin_ia32_vcvtudq2ph256_round_mask:
+ case X86::BI__builtin_ia32_vcvtudq2ps256_round_mask:
+ case X86::BI__builtin_ia32_vcvtuqq2pd256_round_mask:
+ case X86::BI__builtin_ia32_vcvtuqq2ph256_round_mask:
+ case X86::BI__builtin_ia32_vcvtuqq2ps256_round_mask:
+ case X86::BI__builtin_ia32_vcvtuw2ph256_round_mask:
return EmitX86ConvertIntToFp(*this, E, Ops, /*IsSigned*/ false);
case X86::BI__builtin_ia32_vfmaddss3:
@@ -14736,6 +14820,18 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
case X86::BI__builtin_ia32_vfmaddpd512_mask3:
case X86::BI__builtin_ia32_vfmsubpd512_mask3:
case X86::BI__builtin_ia32_vfmsubph512_mask3:
+ case X86::BI__builtin_ia32_vfmaddph256_round_mask:
+ case X86::BI__builtin_ia32_vfmaddph256_round_maskz:
+ case X86::BI__builtin_ia32_vfmaddph256_round_mask3:
+ case X86::BI__builtin_ia32_vfmaddps256_round_mask:
+ case X86::BI__builtin_ia32_vfmaddps256_round_maskz:
+ case X86::BI__builtin_ia32_vfmaddps256_round_mask3:
+ case X86::BI__builtin_ia32_vfmsubps256_round_mask3:
+ case X86::BI__builtin_ia32_vfmaddpd256_round_mask:
+ case X86::BI__builtin_ia32_vfmaddpd256_round_maskz:
+ case X86::BI__builtin_ia32_vfmaddpd256_round_mask3:
+ case X86::BI__builtin_ia32_vfmsubpd256_round_mask3:
+ case X86::BI__builtin_ia32_vfmsubph256_round_mask3:
return EmitX86FMAExpr(*this, E, Ops, BuiltinID, /*IsAddSub*/ false);
case X86::BI__builtin_ia32_vfmaddsubph512_...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/101825
More information about the llvm-commits
mailing list