[llvm] 0508fb4 - [CodeGen][BFloat] Add bfloat MVT type
Ties Stuij via llvm-commits
llvm-commits at lists.llvm.org
Wed May 27 05:38:32 PDT 2020
Author: Ties Stuij
Date: 2020-05-27T13:38:12+01:00
New Revision: 0508fb45dfbc3ffde6bacc1e52177f3972a3eb99
URL: https://github.com/llvm/llvm-project/commit/0508fb45dfbc3ffde6bacc1e52177f3972a3eb99
DIFF: https://github.com/llvm/llvm-project/commit/0508fb45dfbc3ffde6bacc1e52177f3972a3eb99.diff
LOG: [CodeGen][BFloat] Add bfloat MVT type
Summary:
This patch adds BFloat MVT support. It also adds fixed and scalable vector MVT
types for BFloat.
This patch is part of a series that adds support for the Bfloat16 extension of the Armv8.6-a architecture, as
detailed here:
https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a
The bfloat type, and its properties are specified in the Arm Architecture
Reference Manual:
https://developer.arm.com/docs/ddi0487/latest/arm-architecture-reference-manual-armv8-for-armv8-a-architecture-profile
Reviewers: aemerson, huntergr, craig.topper, fpetrogalli, sdesmalen, LukeGeeson, ostannard
Reviewed By: ostannard
Subscribers: LukeGeeson, pbarrio, dschuff, kristof.beyls, hiraditya, aheejin, jdoerfert, llvm-commits
Tags: #llvm
Differential Revision: https://reviews.llvm.org/D79706
Added:
Modified:
llvm/include/llvm/CodeGen/SelectionDAG.h
llvm/include/llvm/CodeGen/ValueTypes.td
llvm/include/llvm/IR/Intrinsics.td
llvm/include/llvm/Support/MachineValueType.h
llvm/lib/CodeGen/ValueTypes.cpp
llvm/utils/TableGen/CodeGenTarget.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 8b794d98d81f..462d9f91c4f1 100644
--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1547,6 +1547,7 @@ class SelectionDAG {
switch (VT.getScalarType().getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unknown FP format");
case MVT::f16: return APFloat::IEEEhalf();
+ case MVT::bf16: return APFloat::BFloat();
case MVT::f32: return APFloat::IEEEsingle();
case MVT::f64: return APFloat::IEEEdouble();
case MVT::f80: return APFloat::x87DoubleExtended();
diff --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td
index 2ec0ed7ce3bd..e08a33a50df6 100644
--- a/llvm/include/llvm/CodeGen/ValueTypes.td
+++ b/llvm/include/llvm/CodeGen/ValueTypes.td
@@ -25,148 +25,159 @@ def i16 : ValueType<16 , 4>; // 16-bit integer value
def i32 : ValueType<32 , 5>; // 32-bit integer value
def i64 : ValueType<64 , 6>; // 64-bit integer value
def i128 : ValueType<128, 7>; // 128-bit integer value
-def f16 : ValueType<16 , 8>; // 16-bit floating point value
-def f32 : ValueType<32 , 9>; // 32-bit floating point value
-def f64 : ValueType<64 , 10>; // 64-bit floating point value
-def f80 : ValueType<80 , 11>; // 80-bit floating point value
-def f128 : ValueType<128, 12>; // 128-bit floating point value
-def ppcf128: ValueType<128, 13>; // PPC 128-bit floating point value
-
-def v1i1 : ValueType<1 , 14>; // 1 x i1 vector value
-def v2i1 : ValueType<2 , 15>; // 2 x i1 vector value
-def v4i1 : ValueType<4 , 16>; // 4 x i1 vector value
-def v8i1 : ValueType<8 , 17>; // 8 x i1 vector value
-def v16i1 : ValueType<16, 18>; // 16 x i1 vector value
-def v32i1 : ValueType<32 , 19>; // 32 x i1 vector value
-def v64i1 : ValueType<64 , 20>; // 64 x i1 vector value
-def v128i1 : ValueType<128, 21>; // 128 x i1 vector value
-def v256i1 : ValueType<256, 22>; // 256 x i1 vector value
-def v512i1 : ValueType<512, 23>; // 512 x i1 vector value
-def v1024i1: ValueType<1024,24>; //1024 x i1 vector value
-
-def v1i8 : ValueType<8, 25>; // 1 x i8 vector value
-def v2i8 : ValueType<16 , 26>; // 2 x i8 vector value
-def v4i8 : ValueType<32 , 27>; // 4 x i8 vector value
-def v8i8 : ValueType<64 , 28>; // 8 x i8 vector value
-def v16i8 : ValueType<128, 29>; // 16 x i8 vector value
-def v32i8 : ValueType<256, 30>; // 32 x i8 vector value
-def v64i8 : ValueType<512, 31>; // 64 x i8 vector value
-def v128i8 : ValueType<1024,32>; //128 x i8 vector value
-def v256i8 : ValueType<2048,33>; //256 x i8 vector value
-
-def v1i16 : ValueType<16 , 34>; // 1 x i16 vector value
-def v2i16 : ValueType<32 , 35>; // 2 x i16 vector value
-def v3i16 : ValueType<48 , 36>; // 3 x i16 vector value
-def v4i16 : ValueType<64 , 37>; // 4 x i16 vector value
-def v8i16 : ValueType<128, 38>; // 8 x i16 vector value
-def v16i16 : ValueType<256, 39>; // 16 x i16 vector value
-def v32i16 : ValueType<512, 40>; // 32 x i16 vector value
-def v64i16 : ValueType<1024,41>; // 64 x i16 vector value
-def v128i16: ValueType<2048,42>; //128 x i16 vector value
-
-def v1i32 : ValueType<32 , 43>; // 1 x i32 vector value
-def v2i32 : ValueType<64 , 44>; // 2 x i32 vector value
-def v3i32 : ValueType<96 , 45>; // 3 x i32 vector value
-def v4i32 : ValueType<128, 46>; // 4 x i32 vector value
-def v5i32 : ValueType<160, 47>; // 5 x i32 vector value
-def v8i32 : ValueType<256, 48>; // 8 x i32 vector value
-def v16i32 : ValueType<512, 49>; // 16 x i32 vector value
-def v32i32 : ValueType<1024,50>; // 32 x i32 vector value
-def v64i32 : ValueType<2048,51>; // 64 x i32 vector value
-def v128i32 : ValueType<4096,52>; // 128 x i32 vector value
-def v256i32 : ValueType<8182,53>; // 256 x i32 vector value
-def v512i32 : ValueType<16384,54>; // 512 x i32 vector value
-def v1024i32 : ValueType<32768,55>; // 1024 x i32 vector value
-def v2048i32 : ValueType<65536,56>; // 2048 x i32 vector value
-
-def v1i64 : ValueType<64 , 57>; // 1 x i64 vector value
-def v2i64 : ValueType<128, 58>; // 2 x i64 vector value
-def v4i64 : ValueType<256, 59>; // 4 x i64 vector value
-def v8i64 : ValueType<512, 60>; // 8 x i64 vector value
-def v16i64 : ValueType<1024,61>; // 16 x i64 vector value
-def v32i64 : ValueType<2048,62>; // 32 x i64 vector value
-
-def v1i128 : ValueType<128, 63>; // 1 x i128 vector value
-
-def v2f16 : ValueType<32 , 64>; // 2 x f16 vector value
-def v3f16 : ValueType<48 , 65>; // 3 x f16 vector value
-def v4f16 : ValueType<64 , 66>; // 4 x f16 vector value
-def v8f16 : ValueType<128, 67>; // 8 x f16 vector value
-def v16f16 : ValueType<256, 68>; // 8 x f16 vector value
-def v32f16 : ValueType<512, 69>; // 8 x f16 vector value
-def v1f32 : ValueType<32 , 70>; // 1 x f32 vector value
-def v2f32 : ValueType<64 , 71>; // 2 x f32 vector value
-def v3f32 : ValueType<96 , 72>; // 3 x f32 vector value
-def v4f32 : ValueType<128, 73>; // 4 x f32 vector value
-def v5f32 : ValueType<160, 74>; // 5 x f32 vector value
-def v8f32 : ValueType<256, 75>; // 8 x f32 vector value
-def v16f32 : ValueType<512, 76>; // 16 x f32 vector value
-def v32f32 : ValueType<1024, 77>; // 32 x f32 vector value
-def v64f32 : ValueType<2048, 78>; // 64 x f32 vector value
-def v128f32 : ValueType<4096, 79>; // 128 x f32 vector value
-def v256f32 : ValueType<8182, 80>; // 256 x f32 vector value
-def v512f32 : ValueType<16384, 81>; // 512 x f32 vector value
-def v1024f32 : ValueType<32768, 82>; // 1024 x f32 vector value
-def v2048f32 : ValueType<65536, 83>; // 2048 x f32 vector value
-def v1f64 : ValueType<64, 84>; // 1 x f64 vector value
-def v2f64 : ValueType<128, 85>; // 2 x f64 vector value
-def v4f64 : ValueType<256, 86>; // 4 x f64 vector value
-def v8f64 : ValueType<512, 87>; // 8 x f64 vector value
-def v16f64 : ValueType<1024, 88>; // 16 x f64 vector value
-
-def nxv1i1 : ValueType<1, 89>; // n x 1 x i1 vector value
-def nxv2i1 : ValueType<2, 90>; // n x 2 x i1 vector value
-def nxv4i1 : ValueType<4, 91>; // n x 4 x i1 vector value
-def nxv8i1 : ValueType<8, 92>; // n x 8 x i1 vector value
-def nxv16i1 : ValueType<16, 93>; // n x 16 x i1 vector value
-def nxv32i1 : ValueType<32, 94>; // n x 32 x i1 vector value
-
-def nxv1i8 : ValueType<8, 95>; // n x 1 x i8 vector value
-def nxv2i8 : ValueType<16, 96>; // n x 2 x i8 vector value
-def nxv4i8 : ValueType<32, 97>; // n x 4 x i8 vector value
-def nxv8i8 : ValueType<64, 98>; // n x 8 x i8 vector value
-def nxv16i8 : ValueType<128, 99>; // n x 16 x i8 vector value
-def nxv32i8 : ValueType<256, 100>; // n x 32 x i8 vector value
-
-def nxv1i16 : ValueType<16, 101>; // n x 1 x i16 vector value
-def nxv2i16 : ValueType<32, 102>; // n x 2 x i16 vector value
-def nxv4i16 : ValueType<64, 103>; // n x 4 x i16 vector value
-def nxv8i16 : ValueType<128, 104>; // n x 8 x i16 vector value
-def nxv16i16: ValueType<256, 105>; // n x 16 x i16 vector value
-def nxv32i16: ValueType<512, 106>; // n x 32 x i16 vector value
-
-def nxv1i32 : ValueType<32, 107>; // n x 1 x i32 vector value
-def nxv2i32 : ValueType<64, 108>; // n x 2 x i32 vector value
-def nxv4i32 : ValueType<128, 109>; // n x 4 x i32 vector value
-def nxv8i32 : ValueType<256, 110>; // n x 8 x i32 vector value
-def nxv16i32: ValueType<512, 111>; // n x 16 x i32 vector value
-def nxv32i32: ValueType<1024,112>; // n x 32 x i32 vector value
-
-def nxv1i64 : ValueType<64, 113>; // n x 1 x i64 vector value
-def nxv2i64 : ValueType<128, 114>; // n x 2 x i64 vector value
-def nxv4i64 : ValueType<256, 115>; // n x 4 x i64 vector value
-def nxv8i64 : ValueType<512, 116>; // n x 8 x i64 vector value
-def nxv16i64: ValueType<1024,117>; // n x 16 x i64 vector value
-def nxv32i64: ValueType<2048,118>; // n x 32 x i64 vector value
-
-def nxv2f16 : ValueType<32 , 119>; // n x 2 x f16 vector value
-def nxv4f16 : ValueType<64 , 120>; // n x 4 x f16 vector value
-def nxv8f16 : ValueType<128, 121>; // n x 8 x f16 vector value
-def nxv1f32 : ValueType<32 , 122>; // n x 1 x f32 vector value
-def nxv2f32 : ValueType<64 , 123>; // n x 2 x f32 vector value
-def nxv4f32 : ValueType<128, 124>; // n x 4 x f32 vector value
-def nxv8f32 : ValueType<256, 125>; // n x 8 x f32 vector value
-def nxv16f32 : ValueType<512, 126>; // n x 16 x f32 vector value
-def nxv1f64 : ValueType<64, 127>; // n x 1 x f64 vector value
-def nxv2f64 : ValueType<128, 128>; // n x 2 x f64 vector value
-def nxv4f64 : ValueType<256, 129>; // n x 4 x f64 vector value
-def nxv8f64 : ValueType<512, 130>; // n x 8 x f64 vector value
-
-def x86mmx : ValueType<64 , 131>; // X86 MMX value
-def FlagVT : ValueType<0 , 132>; // Pre-RA sched glue
-def isVoid : ValueType<0 , 133>; // Produces no value
-def untyped: ValueType<8 , 134>; // Produces an untyped value
-def exnref : ValueType<0 , 135>; // WebAssembly's exnref type
+
+def bf16 : ValueType<16 , 8>; // 16-bit brain floating point value
+def f16 : ValueType<16 , 9>; // 16-bit floating point value
+def f32 : ValueType<32 , 10>; // 32-bit floating point value
+def f64 : ValueType<64 , 11>; // 64-bit floating point value
+def f80 : ValueType<80 , 12>; // 80-bit floating point value
+def f128 : ValueType<128, 13>; // 128-bit floating point value
+def ppcf128: ValueType<128, 14>; // PPC 128-bit floating point value
+
+def v1i1 : ValueType<1 , 15>; // 1 x i1 vector value
+def v2i1 : ValueType<2 , 16>; // 2 x i1 vector value
+def v4i1 : ValueType<4 , 17>; // 4 x i1 vector value
+def v8i1 : ValueType<8 , 18>; // 8 x i1 vector value
+def v16i1 : ValueType<16, 19>; // 16 x i1 vector value
+def v32i1 : ValueType<32 , 20>; // 32 x i1 vector value
+def v64i1 : ValueType<64 , 21>; // 64 x i1 vector value
+def v128i1 : ValueType<128, 22>; // 128 x i1 vector value
+def v256i1 : ValueType<256, 23>; // 256 x i1 vector value
+def v512i1 : ValueType<512, 24>; // 512 x i1 vector value
+def v1024i1: ValueType<1024,25>; //1024 x i1 vector value
+
+def v1i8 : ValueType<8, 26>; // 1 x i8 vector value
+def v2i8 : ValueType<16 , 27>; // 2 x i8 vector value
+def v4i8 : ValueType<32 , 28>; // 4 x i8 vector value
+def v8i8 : ValueType<64 , 29>; // 8 x i8 vector value
+def v16i8 : ValueType<128, 30>; // 16 x i8 vector value
+def v32i8 : ValueType<256, 31>; // 32 x i8 vector value
+def v64i8 : ValueType<512, 32>; // 64 x i8 vector value
+def v128i8 : ValueType<1024,33>; //128 x i8 vector value
+def v256i8 : ValueType<2048,34>; //256 x i8 vector value
+
+def v1i16 : ValueType<16 , 35>; // 1 x i16 vector value
+def v2i16 : ValueType<32 , 36>; // 2 x i16 vector value
+def v3i16 : ValueType<48 , 37>; // 3 x i16 vector value
+def v4i16 : ValueType<64 , 38>; // 4 x i16 vector value
+def v8i16 : ValueType<128, 39>; // 8 x i16 vector value
+def v16i16 : ValueType<256, 40>; // 16 x i16 vector value
+def v32i16 : ValueType<512, 41>; // 32 x i16 vector value
+def v64i16 : ValueType<1024,42>; // 64 x i16 vector value
+def v128i16: ValueType<2048,43>; //128 x i16 vector value
+
+def v1i32 : ValueType<32 , 44>; // 1 x i32 vector value
+def v2i32 : ValueType<64 , 45>; // 2 x i32 vector value
+def v3i32 : ValueType<96 , 46>; // 3 x i32 vector value
+def v4i32 : ValueType<128, 47>; // 4 x i32 vector value
+def v5i32 : ValueType<160, 48>; // 5 x i32 vector value
+def v8i32 : ValueType<256, 49>; // 8 x i32 vector value
+def v16i32 : ValueType<512, 50>; // 16 x i32 vector value
+def v32i32 : ValueType<1024,51>; // 32 x i32 vector value
+def v64i32 : ValueType<2048,52>; // 64 x i32 vector value
+def v128i32 : ValueType<4096,53>; // 128 x i32 vector value
+def v256i32 : ValueType<8182,54>; // 256 x i32 vector value
+def v512i32 : ValueType<16384,55>; // 512 x i32 vector value
+def v1024i32 : ValueType<32768,56>; // 1024 x i32 vector value
+def v2048i32 : ValueType<65536,57>; // 2048 x i32 vector value
+
+def v1i64 : ValueType<64 , 58>; // 1 x i64 vector value
+def v2i64 : ValueType<128, 59>; // 2 x i64 vector value
+def v4i64 : ValueType<256, 60>; // 4 x i64 vector value
+def v8i64 : ValueType<512, 61>; // 8 x i64 vector value
+def v16i64 : ValueType<1024,62>; // 16 x i64 vector value
+def v32i64 : ValueType<2048,63>; // 32 x i64 vector value
+
+def v1i128 : ValueType<128, 64>; // 1 x i128 vector value
+
+def v2f16 : ValueType<32 , 65>; // 2 x f16 vector value
+def v3f16 : ValueType<48 , 66>; // 3 x f16 vector value
+def v4f16 : ValueType<64 , 67>; // 4 x f16 vector value
+def v8f16 : ValueType<128, 68>; // 8 x f16 vector value
+def v16f16 : ValueType<256, 69>; // 16 x f16 vector value
+def v32f16 : ValueType<512, 70>; // 32 x f16 vector value
+def v2bf16 : ValueType<32 , 71>; // 2 x bf16 vector value
+def v3bf16 : ValueType<48 , 72>; // 3 x bf16 vector value
+def v4bf16 : ValueType<64 , 73>; // 4 x bf16 vector value
+def v8bf16 : ValueType<128, 74>; // 8 x bf16 vector value
+def v16bf16 : ValueType<256, 75>; // 16 x bf16 vector value
+def v32bf16 : ValueType<512, 76>; // 32 x bf16 vector value
+def v1f32 : ValueType<32 , 77>; // 1 x f32 vector value
+def v2f32 : ValueType<64 , 78>; // 2 x f32 vector value
+def v3f32 : ValueType<96 , 79>; // 3 x f32 vector value
+def v4f32 : ValueType<128, 80>; // 4 x f32 vector value
+def v5f32 : ValueType<160, 81>; // 5 x f32 vector value
+def v8f32 : ValueType<256, 82>; // 8 x f32 vector value
+def v16f32 : ValueType<512, 83>; // 16 x f32 vector value
+def v32f32 : ValueType<1024, 84>; // 32 x f32 vector value
+def v64f32 : ValueType<2048, 85>; // 64 x f32 vector value
+def v128f32 : ValueType<4096, 86>; // 128 x f32 vector value
+def v256f32 : ValueType<8182, 87>; // 256 x f32 vector value
+def v512f32 : ValueType<16384, 88>; // 512 x f32 vector value
+def v1024f32 : ValueType<32768, 89>; // 1024 x f32 vector value
+def v2048f32 : ValueType<65536, 90>; // 2048 x f32 vector value
+def v1f64 : ValueType<64, 91>; // 1 x f64 vector value
+def v2f64 : ValueType<128, 92>; // 2 x f64 vector value
+def v4f64 : ValueType<256, 93>; // 4 x f64 vector value
+def v8f64 : ValueType<512, 94>; // 8 x f64 vector value
+def v16f64 : ValueType<1024, 95>; // 16 x f64 vector value
+
+def nxv1i1 : ValueType<1, 96>; // n x 1 x i1 vector value
+def nxv2i1 : ValueType<2, 97>; // n x 2 x i1 vector value
+def nxv4i1 : ValueType<4, 98>; // n x 4 x i1 vector value
+def nxv8i1 : ValueType<8, 99>; // n x 8 x i1 vector value
+def nxv16i1 : ValueType<16, 100>; // n x 16 x i1 vector value
+def nxv32i1 : ValueType<32, 101>; // n x 32 x i1 vector value
+
+def nxv1i8 : ValueType<8, 102>; // n x 1 x i8 vector value
+def nxv2i8 : ValueType<16, 103>; // n x 2 x i8 vector value
+def nxv4i8 : ValueType<32, 104>; // n x 4 x i8 vector value
+def nxv8i8 : ValueType<64, 105>; // n x 8 x i8 vector value
+def nxv16i8 : ValueType<128, 106>; // n x 16 x i8 vector value
+def nxv32i8 : ValueType<256, 107>; // n x 32 x i8 vector value
+
+def nxv1i16 : ValueType<16, 108>; // n x 1 x i16 vector value
+def nxv2i16 : ValueType<32, 109>; // n x 2 x i16 vector value
+def nxv4i16 : ValueType<64, 110>; // n x 4 x i16 vector value
+def nxv8i16 : ValueType<128, 111>; // n x 8 x i16 vector value
+def nxv16i16: ValueType<256, 112>; // n x 16 x i16 vector value
+def nxv32i16: ValueType<512, 113>; // n x 32 x i16 vector value
+
+def nxv1i32 : ValueType<32, 114>; // n x 1 x i32 vector value
+def nxv2i32 : ValueType<64, 115>; // n x 2 x i32 vector value
+def nxv4i32 : ValueType<128, 116>; // n x 4 x i32 vector value
+def nxv8i32 : ValueType<256, 117>; // n x 8 x i32 vector value
+def nxv16i32: ValueType<512, 118>; // n x 16 x i32 vector value
+def nxv32i32: ValueType<1024,119>; // n x 32 x i32 vector value
+
+def nxv1i64 : ValueType<64, 120>; // n x 1 x i64 vector value
+def nxv2i64 : ValueType<128, 121>; // n x 2 x i64 vector value
+def nxv4i64 : ValueType<256, 122>; // n x 4 x i64 vector value
+def nxv8i64 : ValueType<512, 123>; // n x 8 x i64 vector value
+def nxv16i64: ValueType<1024,124>; // n x 16 x i64 vector value
+def nxv32i64: ValueType<2048,125>; // n x 32 x i64 vector value
+
+def nxv2f16 : ValueType<32 , 126>; // n x 2 x f16 vector value
+def nxv4f16 : ValueType<64 , 127>; // n x 4 x f16 vector value
+def nxv8f16 : ValueType<128, 128>; // n x 8 x f16 vector value
+def nxv2bf16 : ValueType<32 , 129>; // n x 2 x bf16 vector value
+def nxv4bf16 : ValueType<64 , 130>; // n x 4 x bf16 vector value
+def nxv8bf16 : ValueType<128, 131>; // n x 8 x bf16 vector value
+def nxv1bf32 : ValueType<32 , 132>; // n x 1 x f32 vector value
+def nxv2f32 : ValueType<64 , 133>; // n x 2 x f32 vector value
+def nxv4f32 : ValueType<128, 134>; // n x 4 x f32 vector value
+def nxv8f32 : ValueType<256, 135>; // n x 8 x f32 vector value
+def nxv16f32 : ValueType<512, 136>; // n x 16 x f32 vector value
+def nxv1f64 : ValueType<64, 137>; // n x 1 x f64 vector value
+def nxv2f64 : ValueType<128, 138>; // n x 2 x f64 vector value
+def nxv4f64 : ValueType<256, 139>; // n x 4 x f64 vector value
+def nxv8f64 : ValueType<512, 140>; // n x 8 x f64 vector value
+
+def x86mmx : ValueType<64 , 141>; // X86 MMX value
+def FlagVT : ValueType<0 , 142>; // Pre-RA sched glue
+def isVoid : ValueType<0 , 143>; // Produces no value
+def untyped: ValueType<8 , 144>; // Produces an untyped value
+def exnref : ValueType<0 , 145>; // WebAssembly's exnref type
def token : ValueType<0 , 248>; // TokenTy
def MetadataVT: ValueType<0, 249>; // Metadata
diff --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 7bfb25b0ed7d..33961767e1c0 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -214,6 +214,7 @@ def llvm_i16_ty : LLVMType<i16>;
def llvm_i32_ty : LLVMType<i32>;
def llvm_i64_ty : LLVMType<i64>;
def llvm_half_ty : LLVMType<f16>;
+def llvm_bfloat_ty : LLVMType<bf16>;
def llvm_float_ty : LLVMType<f32>;
def llvm_double_ty : LLVMType<f64>;
def llvm_f80_ty : LLVMType<f80>;
diff --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h
index 224353c5047f..93683eb7dcf7 100644
--- a/llvm/include/llvm/Support/MachineValueType.h
+++ b/llvm/include/llvm/Support/MachineValueType.h
@@ -47,100 +47,107 @@ namespace llvm {
FIRST_INTEGER_VALUETYPE = i1,
LAST_INTEGER_VALUETYPE = i128,
- f16 = 8, // This is a 16 bit floating point value
- f32 = 9, // This is a 32 bit floating point value
- f64 = 10, // This is a 64 bit floating point value
- f80 = 11, // This is a 80 bit floating point value
- f128 = 12, // This is a 128 bit floating point value
- ppcf128 = 13, // This is a PPC 128-bit floating point value
-
- FIRST_FP_VALUETYPE = f16,
+ bf16 = 8, // This is a 16 bit brain floating point value
+ f16 = 9, // This is a 16 bit floating point value
+ f32 = 10, // This is a 32 bit floating point value
+ f64 = 11, // This is a 64 bit floating point value
+ f80 = 12, // This is a 80 bit floating point value
+ f128 = 13, // This is a 128 bit floating point value
+ ppcf128 = 14, // This is a PPC 128-bit floating point value
+
+ FIRST_FP_VALUETYPE = bf16,
LAST_FP_VALUETYPE = ppcf128,
- v1i1 = 14, // 1 x i1
- v2i1 = 15, // 2 x i1
- v4i1 = 16, // 4 x i1
- v8i1 = 17, // 8 x i1
- v16i1 = 18, // 16 x i1
- v32i1 = 19, // 32 x i1
- v64i1 = 20, // 64 x i1
- v128i1 = 21, // 128 x i1
- v256i1 = 22, // 256 x i1
- v512i1 = 23, // 512 x i1
- v1024i1 = 24, // 1024 x i1
-
- v1i8 = 25, // 1 x i8
- v2i8 = 26, // 2 x i8
- v4i8 = 27, // 4 x i8
- v8i8 = 28, // 8 x i8
- v16i8 = 29, // 16 x i8
- v32i8 = 30, // 32 x i8
- v64i8 = 31, // 64 x i8
- v128i8 = 32, //128 x i8
- v256i8 = 33, //256 x i8
-
- v1i16 = 34, // 1 x i16
- v2i16 = 35, // 2 x i16
- v3i16 = 36, // 3 x i16
- v4i16 = 37, // 4 x i16
- v8i16 = 38, // 8 x i16
- v16i16 = 39, // 16 x i16
- v32i16 = 40, // 32 x i16
- v64i16 = 41, // 64 x i16
- v128i16 = 42, //128 x i16
-
- v1i32 = 43, // 1 x i32
- v2i32 = 44, // 2 x i32
- v3i32 = 45, // 3 x i32
- v4i32 = 46, // 4 x i32
- v5i32 = 47, // 5 x i32
- v8i32 = 48, // 8 x i32
- v16i32 = 49, // 16 x i32
- v32i32 = 50, // 32 x i32
- v64i32 = 51, // 64 x i32
- v128i32 = 52, // 128 x i32
- v256i32 = 53, // 256 x i32
- v512i32 = 54, // 512 x i32
- v1024i32 = 55, // 1024 x i32
- v2048i32 = 56, // 2048 x i32
-
- v1i64 = 57, // 1 x i64
- v2i64 = 58, // 2 x i64
- v4i64 = 59, // 4 x i64
- v8i64 = 60, // 8 x i64
- v16i64 = 61, // 16 x i64
- v32i64 = 62, // 32 x i64
-
- v1i128 = 63, // 1 x i128
+ v1i1 = 15, // 1 x i1
+ v2i1 = 16, // 2 x i1
+ v4i1 = 17, // 4 x i1
+ v8i1 = 18, // 8 x i1
+ v16i1 = 19, // 16 x i1
+ v32i1 = 20, // 32 x i1
+ v64i1 = 21, // 64 x i1
+ v128i1 = 22, // 128 x i1
+ v256i1 = 23, // 256 x i1
+ v512i1 = 24, // 512 x i1
+ v1024i1 = 25, // 1024 x i1
+
+ v1i8 = 26, // 1 x i8
+ v2i8 = 27, // 2 x i8
+ v4i8 = 28, // 4 x i8
+ v8i8 = 29, // 8 x i8
+ v16i8 = 30, // 16 x i8
+ v32i8 = 31, // 32 x i8
+ v64i8 = 32, // 64 x i8
+ v128i8 = 33, //128 x i8
+ v256i8 = 34, //256 x i8
+
+ v1i16 = 35, // 1 x i16
+ v2i16 = 36, // 2 x i16
+ v3i16 = 37, // 3 x i16
+ v4i16 = 38, // 4 x i16
+ v8i16 = 39, // 8 x i16
+ v16i16 = 40, // 16 x i16
+ v32i16 = 41, // 32 x i16
+ v64i16 = 42, // 64 x i16
+ v128i16 = 43, //128 x i16
+
+ v1i32 = 44, // 1 x i32
+ v2i32 = 45, // 2 x i32
+ v3i32 = 46, // 3 x i32
+ v4i32 = 47, // 4 x i32
+ v5i32 = 48, // 5 x i32
+ v8i32 = 49, // 8 x i32
+ v16i32 = 50, // 16 x i32
+ v32i32 = 51, // 32 x i32
+ v64i32 = 52, // 64 x i32
+ v128i32 = 53, // 128 x i32
+ v256i32 = 54, // 256 x i32
+ v512i32 = 55, // 512 x i32
+ v1024i32 = 56, // 1024 x i32
+ v2048i32 = 57, // 2048 x i32
+
+ v1i64 = 58, // 1 x i64
+ v2i64 = 59, // 2 x i64
+ v4i64 = 60, // 4 x i64
+ v8i64 = 61, // 8 x i64
+ v16i64 = 62, // 16 x i64
+ v32i64 = 63, // 32 x i64
+
+ v1i128 = 64, // 1 x i128
FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i1,
LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i128,
- v2f16 = 64, // 2 x f16
- v3f16 = 65, // 3 x f16
- v4f16 = 66, // 4 x f16
- v8f16 = 67, // 8 x f16
- v16f16 = 68, // 16 x f16
- v32f16 = 69, // 32 x f16
- v1f32 = 70, // 1 x f32
- v2f32 = 71, // 2 x f32
- v3f32 = 72, // 3 x f32
- v4f32 = 73, // 4 x f32
- v5f32 = 74, // 5 x f32
- v8f32 = 75, // 8 x f32
- v16f32 = 76, // 16 x f32
- v32f32 = 77, // 32 x f32
- v64f32 = 78, // 64 x f32
- v128f32 = 79, // 128 x f32
- v256f32 = 80, // 256 x f32
- v512f32 = 81, // 512 x f32
- v1024f32 = 82, // 1024 x f32
- v2048f32 = 83, // 2048 x f32
- v1f64 = 84, // 1 x f64
- v2f64 = 85, // 2 x f64
- v4f64 = 86, // 4 x f64
- v8f64 = 87, // 8 x f64
- v16f64 = 88, // 16 x f64
+ v2f16 = 65, // 2 x f16
+ v3f16 = 66, // 3 x f16
+ v4f16 = 67, // 4 x f16
+ v8f16 = 68, // 8 x f16
+ v16f16 = 69, // 16 x f16
+ v32f16 = 70, // 32 x f16
+ v2bf16 = 71, // 2 x bf16
+ v3bf16 = 72, // 3 x bf16
+ v4bf16 = 73, // 4 x bf16
+ v8bf16 = 74, // 8 x bf16
+ v16bf16 = 75, // 16 x bf16
+ v32bf16 = 76, // 32 x bf16
+ v1f32 = 77, // 1 x f32
+ v2f32 = 78, // 2 x f32
+ v3f32 = 79, // 3 x f32
+ v4f32 = 80, // 4 x f32
+ v5f32 = 81, // 5 x f32
+ v8f32 = 82, // 8 x f32
+ v16f32 = 83, // 16 x f32
+ v32f32 = 84, // 32 x f32
+ v64f32 = 85, // 64 x f32
+ v128f32 = 86, // 128 x f32
+ v256f32 = 87, // 256 x f32
+ v512f32 = 88, // 512 x f32
+ v1024f32 = 89, // 1024 x f32
+ v2048f32 = 90, // 2048 x f32
+ v1f64 = 91, // 1 x f64
+ v2f64 = 92, // 2 x f64
+ v4f64 = 93, // 4 x f64
+ v8f64 = 94, // 8 x f64
+ v16f64 = 95, // 16 x f64
FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE = v2f16,
LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v16f64,
@@ -148,56 +155,59 @@ namespace llvm {
FIRST_FIXEDLEN_VECTOR_VALUETYPE = v1i1,
LAST_FIXEDLEN_VECTOR_VALUETYPE = v16f64,
- nxv1i1 = 89, // n x 1 x i1
- nxv2i1 = 90, // n x 2 x i1
- nxv4i1 = 91, // n x 4 x i1
- nxv8i1 = 92, // n x 8 x i1
- nxv16i1 = 93, // n x 16 x i1
- nxv32i1 = 94, // n x 32 x i1
-
- nxv1i8 = 95, // n x 1 x i8
- nxv2i8 = 96, // n x 2 x i8
- nxv4i8 = 97, // n x 4 x i8
- nxv8i8 = 98, // n x 8 x i8
- nxv16i8 = 99, // n x 16 x i8
- nxv32i8 = 100, // n x 32 x i8
-
- nxv1i16 = 101, // n x 1 x i16
- nxv2i16 = 102, // n x 2 x i16
- nxv4i16 = 103, // n x 4 x i16
- nxv8i16 = 104, // n x 8 x i16
- nxv16i16 = 105, // n x 16 x i16
- nxv32i16 = 106, // n x 32 x i16
-
- nxv1i32 = 107, // n x 1 x i32
- nxv2i32 = 108, // n x 2 x i32
- nxv4i32 = 109, // n x 4 x i32
- nxv8i32 = 110, // n x 8 x i32
- nxv16i32 = 111, // n x 16 x i32
- nxv32i32 = 112, // n x 32 x i32
-
- nxv1i64 = 113, // n x 1 x i64
- nxv2i64 = 114, // n x 2 x i64
- nxv4i64 = 115, // n x 4 x i64
- nxv8i64 = 116, // n x 8 x i64
- nxv16i64 = 117, // n x 16 x i64
- nxv32i64 = 118, // n x 32 x i64
+ nxv1i1 = 96, // n x 1 x i1
+ nxv2i1 = 97, // n x 2 x i1
+ nxv4i1 = 98, // n x 4 x i1
+ nxv8i1 = 99, // n x 8 x i1
+ nxv16i1 = 100, // n x 16 x i1
+ nxv32i1 = 101, // n x 32 x i1
+
+ nxv1i8 = 102, // n x 1 x i8
+ nxv2i8 = 103, // n x 2 x i8
+ nxv4i8 = 104, // n x 4 x i8
+ nxv8i8 = 105, // n x 8 x i8
+ nxv16i8 = 106, // n x 16 x i8
+ nxv32i8 = 107, // n x 32 x i8
+
+ nxv1i16 = 108, // n x 1 x i16
+ nxv2i16 = 109, // n x 2 x i16
+ nxv4i16 = 110, // n x 4 x i16
+ nxv8i16 = 111, // n x 8 x i16
+ nxv16i16 = 112, // n x 16 x i16
+ nxv32i16 = 113, // n x 32 x i16
+
+ nxv1i32 = 114, // n x 1 x i32
+ nxv2i32 = 115, // n x 2 x i32
+ nxv4i32 = 116, // n x 4 x i32
+ nxv8i32 = 117, // n x 8 x i32
+ nxv16i32 = 118, // n x 16 x i32
+ nxv32i32 = 119, // n x 32 x i32
+
+ nxv1i64 = 120, // n x 1 x i64
+ nxv2i64 = 121, // n x 2 x i64
+ nxv4i64 = 122, // n x 4 x i64
+ nxv8i64 = 123, // n x 8 x i64
+ nxv16i64 = 124, // n x 16 x i64
+ nxv32i64 = 125, // n x 32 x i64
FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv1i1,
LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv32i64,
- nxv2f16 = 119, // n x 2 x f16
- nxv4f16 = 120, // n x 4 x f16
- nxv8f16 = 121, // n x 8 x f16
- nxv1f32 = 122, // n x 1 x f32
- nxv2f32 = 123, // n x 2 x f32
- nxv4f32 = 124, // n x 4 x f32
- nxv8f32 = 125, // n x 8 x f32
- nxv16f32 = 126, // n x 16 x f32
- nxv1f64 = 127, // n x 1 x f64
- nxv2f64 = 128, // n x 2 x f64
- nxv4f64 = 129, // n x 4 x f64
- nxv8f64 = 130, // n x 8 x f64
+ nxv2f16 = 126, // n x 2 x f16
+ nxv4f16 = 127, // n x 4 x f16
+ nxv8f16 = 128, // n x 8 x f16
+ nxv2bf16 = 129, // n x 2 x bf16
+ nxv4bf16 = 130, // n x 4 x bf16
+ nxv8bf16 = 131, // n x 8 x bf16
+ nxv1f32 = 132, // n x 1 x f32
+ nxv2f32 = 133, // n x 2 x f32
+ nxv4f32 = 134, // n x 4 x f32
+ nxv8f32 = 135, // n x 8 x f32
+ nxv16f32 = 136, // n x 16 x f32
+ nxv1f64 = 137, // n x 1 x f64
+ nxv2f64 = 138, // n x 2 x f64
+ nxv4f64 = 139, // n x 4 x f64
+ nxv8f64 = 140, // n x 8 x f64
FIRST_FP_SCALABLE_VECTOR_VALUETYPE = nxv2f16,
LAST_FP_SCALABLE_VECTOR_VALUETYPE = nxv8f64,
@@ -208,20 +218,20 @@ namespace llvm {
FIRST_VECTOR_VALUETYPE = v1i1,
LAST_VECTOR_VALUETYPE = nxv8f64,
- x86mmx = 131, // This is an X86 MMX value
+ x86mmx = 141, // This is an X86 MMX value
- Glue = 132, // This glues nodes together during pre-RA sched
+ Glue = 142, // This glues nodes together during pre-RA sched
- isVoid = 133, // This has no value
+ isVoid = 143, // This has no value
- Untyped = 134, // This value takes a register, but has
+ Untyped = 144, // This value takes a register, but has
// unspecified type. The register class
// will be determined by the opcode.
- exnref = 135, // WebAssembly's exnref type
+ exnref = 145, // WebAssembly's exnref type
FIRST_VALUETYPE = 1, // This is always the beginning of the list.
- LAST_VALUETYPE = 136, // This always remains at the end of the list.
+ LAST_VALUETYPE = 146, // This always remains at the end of the list.
// This is the current maximum for LAST_VALUETYPE.
// MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
@@ -333,17 +343,19 @@ namespace llvm {
/// Return true if this is a 32-bit vector type.
bool is32BitVector() const {
- return (SimpleTy == MVT::v32i1 || SimpleTy == MVT::v4i8 ||
- SimpleTy == MVT::v2i16 || SimpleTy == MVT::v1i32 ||
- SimpleTy == MVT::v2f16 || SimpleTy == MVT::v1f32);
+ return (SimpleTy == MVT::v32i1 || SimpleTy == MVT::v4i8 ||
+ SimpleTy == MVT::v2i16 || SimpleTy == MVT::v1i32 ||
+ SimpleTy == MVT::v2f16 || SimpleTy == MVT::v2bf16 ||
+ SimpleTy == MVT::v1f32);
}
/// Return true if this is a 64-bit vector type.
bool is64BitVector() const {
- return (SimpleTy == MVT::v64i1 || SimpleTy == MVT::v8i8 ||
- SimpleTy == MVT::v4i16 || SimpleTy == MVT::v2i32 ||
- SimpleTy == MVT::v1i64 || SimpleTy == MVT::v4f16 ||
- SimpleTy == MVT::v2f32 || SimpleTy == MVT::v1f64);
+ return (SimpleTy == MVT::v64i1 || SimpleTy == MVT::v8i8 ||
+ SimpleTy == MVT::v4i16 || SimpleTy == MVT::v2i32 ||
+ SimpleTy == MVT::v1i64 || SimpleTy == MVT::v4f16 ||
+ SimpleTy == MVT::v4bf16 ||SimpleTy == MVT::v2f32 ||
+ SimpleTy == MVT::v1f64);
}
/// Return true if this is a 128-bit vector type.
@@ -351,24 +363,26 @@ namespace llvm {
return (SimpleTy == MVT::v128i1 || SimpleTy == MVT::v16i8 ||
SimpleTy == MVT::v8i16 || SimpleTy == MVT::v4i32 ||
SimpleTy == MVT::v2i64 || SimpleTy == MVT::v1i128 ||
- SimpleTy == MVT::v8f16 || SimpleTy == MVT::v4f32 ||
- SimpleTy == MVT::v2f64);
+ SimpleTy == MVT::v8f16 || SimpleTy == MVT::v8bf16 ||
+ SimpleTy == MVT::v4f32 || SimpleTy == MVT::v2f64);
}
/// Return true if this is a 256-bit vector type.
bool is256BitVector() const {
- return (SimpleTy == MVT::v16f16 || SimpleTy == MVT::v8f32 ||
- SimpleTy == MVT::v4f64 || SimpleTy == MVT::v32i8 ||
- SimpleTy == MVT::v16i16 || SimpleTy == MVT::v8i32 ||
- SimpleTy == MVT::v4i64 || SimpleTy == MVT::v256i1);
+ return (SimpleTy == MVT::v16f16 || SimpleTy == MVT::v16bf16 ||
+ SimpleTy == MVT::v8f32 || SimpleTy == MVT::v4f64 ||
+ SimpleTy == MVT::v32i8 || SimpleTy == MVT::v16i16 ||
+ SimpleTy == MVT::v8i32 || SimpleTy == MVT::v4i64 ||
+ SimpleTy == MVT::v256i1);
}
/// Return true if this is a 512-bit vector type.
bool is512BitVector() const {
- return (SimpleTy == MVT::v32f16 || SimpleTy == MVT::v16f32 ||
- SimpleTy == MVT::v8f64 || SimpleTy == MVT::v512i1 ||
- SimpleTy == MVT::v64i8 || SimpleTy == MVT::v32i16 ||
- SimpleTy == MVT::v16i32 || SimpleTy == MVT::v8i64);
+ return (SimpleTy == MVT::v32f16 || SimpleTy == MVT::v32bf16 ||
+ SimpleTy == MVT::v16f32 || SimpleTy == MVT::v8f64 ||
+ SimpleTy == MVT::v512i1 || SimpleTy == MVT::v64i8 ||
+ SimpleTy == MVT::v32i16 || SimpleTy == MVT::v16i32 ||
+ SimpleTy == MVT::v8i64);
}
/// Return true if this is a 1024-bit vector type.
@@ -515,6 +529,15 @@ namespace llvm {
case nxv2f16:
case nxv4f16:
case nxv8f16: return f16;
+ case v2bf16:
+ case v3bf16:
+ case v4bf16:
+ case v8bf16:
+ case v16bf16:
+ case v32bf16:
+ case nxv2bf16:
+ case nxv4bf16:
+ case nxv8bf16: return bf16;
case v1f32:
case v2f32:
case v3f32:
@@ -578,6 +601,7 @@ namespace llvm {
case v32i32:
case v32i64:
case v32f16:
+ case v32bf16:
case v32f32:
case nxv32i1:
case nxv32i8:
@@ -590,6 +614,7 @@ namespace llvm {
case v16i32:
case v16i64:
case v16f16:
+ case v16bf16:
case v16f32:
case v16f64:
case nxv16i1:
@@ -604,6 +629,7 @@ namespace llvm {
case v8i32:
case v8i64:
case v8f16:
+ case v8bf16:
case v8f32:
case v8f64:
case nxv8i1:
@@ -612,6 +638,7 @@ namespace llvm {
case nxv8i32:
case nxv8i64:
case nxv8f16:
+ case nxv8bf16:
case nxv8f32:
case nxv8f64: return 8;
case v5i32:
@@ -622,6 +649,7 @@ namespace llvm {
case v4i32:
case v4i64:
case v4f16:
+ case v4bf16:
case v4f32:
case v4f64:
case nxv4i1:
@@ -630,11 +658,13 @@ namespace llvm {
case nxv4i32:
case nxv4i64:
case nxv4f16:
+ case nxv4bf16:
case nxv4f32:
case nxv4f64: return 4;
case v3i16:
case v3i32:
case v3f16:
+ case v3bf16:
case v3f32: return 3;
case v2i1:
case v2i8:
@@ -642,6 +672,7 @@ namespace llvm {
case v2i32:
case v2i64:
case v2f16:
+ case v2bf16:
case v2f32:
case v2f64:
case nxv2i1:
@@ -650,6 +681,7 @@ namespace llvm {
case nxv2i32:
case nxv2i64:
case nxv2f16:
+ case nxv2bf16:
case nxv2f32:
case nxv2f64: return 2;
case v1i1:
@@ -712,6 +744,7 @@ namespace llvm {
case nxv8i1: return TypeSize::Scalable(8);
case i16 :
case f16:
+ case bf16:
case v16i1:
case v2i8:
case v1i16: return TypeSize::Fixed(16);
@@ -724,6 +757,7 @@ namespace llvm {
case v4i8:
case v2i16:
case v2f16:
+ case v2bf16:
case v1f32:
case v1i32: return TypeSize::Fixed(32);
case nxv32i1:
@@ -731,9 +765,11 @@ namespace llvm {
case nxv2i16:
case nxv1i32:
case nxv2f16:
+ case nxv2bf16:
case nxv1f32: return TypeSize::Scalable(32);
case v3i16:
- case v3f16: return TypeSize::Fixed(48);
+ case v3f16:
+ case v3bf16: return TypeSize::Fixed(48);
case x86mmx:
case f64 :
case i64 :
@@ -743,6 +779,7 @@ namespace llvm {
case v2i32:
case v1i64:
case v4f16:
+ case v4bf16:
case v2f32:
case v1f64: return TypeSize::Fixed(64);
case nxv8i8:
@@ -750,6 +787,7 @@ namespace llvm {
case nxv2i32:
case nxv1i64:
case nxv4f16:
+ case nxv4bf16:
case nxv2f32:
case nxv1f64: return TypeSize::Scalable(64);
case f80 : return TypeSize::Fixed(80);
@@ -765,6 +803,7 @@ namespace llvm {
case v2i64:
case v1i128:
case v8f16:
+ case v8bf16:
case v4f32:
case v2f64: return TypeSize::Fixed(128);
case nxv16i8:
@@ -772,6 +811,7 @@ namespace llvm {
case nxv4i32:
case nxv2i64:
case nxv8f16:
+ case nxv8bf16:
case nxv4f32:
case nxv2f64: return TypeSize::Scalable(128);
case v5i32:
@@ -782,6 +822,7 @@ namespace llvm {
case v8i32:
case v4i64:
case v16f16:
+ case v16bf16:
case v8f32:
case v4f64: return TypeSize::Fixed(256);
case nxv32i8:
@@ -796,6 +837,7 @@ namespace llvm {
case v16i32:
case v8i64:
case v32f16:
+ case v32bf16:
case v16f32:
case v8f64: return TypeSize::Fixed(512);
case nxv32i16:
@@ -993,6 +1035,14 @@ namespace llvm {
if (NumElements == 16) return MVT::v16f16;
if (NumElements == 32) return MVT::v32f16;
break;
+ case MVT::bf16:
+ if (NumElements == 2) return MVT::v2bf16;
+ if (NumElements == 3) return MVT::v3bf16;
+ if (NumElements == 4) return MVT::v4bf16;
+ if (NumElements == 8) return MVT::v8bf16;
+ if (NumElements == 16) return MVT::v16bf16;
+ if (NumElements == 32) return MVT::v32bf16;
+ break;
case MVT::f32:
if (NumElements == 1) return MVT::v1f32;
if (NumElements == 2) return MVT::v2f32;
@@ -1069,6 +1119,11 @@ namespace llvm {
if (NumElements == 4) return MVT::nxv4f16;
if (NumElements == 8) return MVT::nxv8f16;
break;
+ case MVT::bf16:
+ if (NumElements == 2) return MVT::nxv2bf16;
+ if (NumElements == 4) return MVT::nxv4bf16;
+ if (NumElements == 8) return MVT::nxv8bf16;
+ break;
case MVT::f32:
if (NumElements == 1) return MVT::nxv1f32;
if (NumElements == 2) return MVT::nxv2f32;
diff --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp
index e24ad844a62c..2b97e9d83dd0 100644
--- a/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/llvm/lib/CodeGen/ValueTypes.cpp
@@ -147,6 +147,7 @@ std::string EVT::getEVTString() const {
if (isFloatingPoint())
return "f" + utostr(getSizeInBits());
llvm_unreachable("Invalid EVT!");
+ case MVT::bf16: return "bf16";
case MVT::ppcf128: return "ppcf128";
case MVT::isVoid: return "isVoid";
case MVT::Other: return "ch";
@@ -174,6 +175,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::i64: return Type::getInt64Ty(Context);
case MVT::i128: return IntegerType::get(Context, 128);
case MVT::f16: return Type::getHalfTy(Context);
+ case MVT::bf16: return Type::getBFloatTy(Context);
case MVT::f32: return Type::getFloatTy(Context);
case MVT::f64: return Type::getDoubleTy(Context);
case MVT::f80: return Type::getX86_FP80Ty(Context);
@@ -236,6 +238,12 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8);
case MVT::v16f16: return VectorType::get(Type::getHalfTy(Context), 16);
case MVT::v32f16: return VectorType::get(Type::getHalfTy(Context), 32);
+ case MVT::v2bf16: return VectorType::get(Type::getBFloatTy(Context), 2);
+ case MVT::v3bf16: return VectorType::get(Type::getBFloatTy(Context), 3);
+ case MVT::v4bf16: return VectorType::get(Type::getBFloatTy(Context), 4);
+ case MVT::v8bf16: return VectorType::get(Type::getBFloatTy(Context), 8);
+ case MVT::v16bf16: return VectorType::get(Type::getBFloatTy(Context), 16);
+ case MVT::v32bf16: return VectorType::get(Type::getBFloatTy(Context), 32);
case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1);
case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2);
case MVT::v3f32: return VectorType::get(Type::getFloatTy(Context), 3);
@@ -321,6 +329,12 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
return VectorType::get(Type::getHalfTy(Context), 4, /*Scalable=*/ true);
case MVT::nxv8f16:
return VectorType::get(Type::getHalfTy(Context), 8, /*Scalable=*/ true);
+ case MVT::nxv2bf16:
+ return VectorType::get(Type::getBFloatTy(Context), 2, /*Scalable=*/ true);
+ case MVT::nxv4bf16:
+ return VectorType::get(Type::getBFloatTy(Context), 4, /*Scalable=*/ true);
+ case MVT::nxv8bf16:
+ return VectorType::get(Type::getBFloatTy(Context), 8, /*Scalable=*/ true);
case MVT::nxv1f32:
return VectorType::get(Type::getFloatTy(Context), 1, /*Scalable=*/ true);
case MVT::nxv2f32:
@@ -356,6 +370,7 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){
case Type::IntegerTyID:
return getIntegerVT(cast<IntegerType>(Ty)->getBitWidth());
case Type::HalfTyID: return MVT(MVT::f16);
+ case Type::BFloatTyID: return MVT(MVT::bf16);
case Type::FloatTyID: return MVT(MVT::f32);
case Type::DoubleTyID: return MVT(MVT::f64);
case Type::X86_FP80TyID: return MVT(MVT::f80);
diff --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp
index e0470e4266f8..282e62cf838e 100644
--- a/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -69,6 +69,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::fAny: return "MVT::fAny";
case MVT::vAny: return "MVT::vAny";
case MVT::f16: return "MVT::f16";
+ case MVT::bf16: return "MVT::bf16";
case MVT::f32: return "MVT::f32";
case MVT::f64: return "MVT::f64";
case MVT::f80: return "MVT::f80";
@@ -132,6 +133,12 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::v8f16: return "MVT::v8f16";
case MVT::v16f16: return "MVT::v16f16";
case MVT::v32f16: return "MVT::v32f16";
+ case MVT::v2bf16: return "MVT::v2bf16";
+ case MVT::v3bf16: return "MVT::v3bf16";
+ case MVT::v4bf16: return "MVT::v4bf16";
+ case MVT::v8bf16: return "MVT::v8bf16";
+ case MVT::v16bf16: return "MVT::v16bf16";
+ case MVT::v32bf16: return "MVT::v32bf16";
case MVT::v1f32: return "MVT::v1f32";
case MVT::v2f32: return "MVT::v2f32";
case MVT::v3f32: return "MVT::v3f32";
@@ -182,6 +189,9 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
case MVT::nxv2f16: return "MVT::nxv2f16";
case MVT::nxv4f16: return "MVT::nxv4f16";
case MVT::nxv8f16: return "MVT::nxv8f16";
+ case MVT::nxv2bf16: return "MVT::nxv2bf16";
+ case MVT::nxv4bf16: return "MVT::nxv4bf16";
+ case MVT::nxv8bf16: return "MVT::nxv8bf16";
case MVT::nxv1f32: return "MVT::nxv1f32";
case MVT::nxv2f32: return "MVT::nxv2f32";
case MVT::nxv4f32: return "MVT::nxv4f32";
More information about the llvm-commits
mailing list