[llvm] 0508fb4 - [CodeGen][BFloat] Add bfloat MVT type

Wed May 27 05:38:32 PDT 2020

Author: Ties Stuij
Date: 2020-05-27T13:38:12+01:00
New Revision: 0508fb45dfbc3ffde6bacc1e52177f3972a3eb99

URL: https://github.com/llvm/llvm-project/commit/0508fb45dfbc3ffde6bacc1e52177f3972a3eb99
DIFF: https://github.com/llvm/llvm-project/commit/0508fb45dfbc3ffde6bacc1e52177f3972a3eb99.diff

LOG: [CodeGen][BFloat] Add bfloat MVT type

Summary:
This patch adds BFloat MVT support. It also adds fixed and scalable vector MVT
types for BFloat.

This patch is part of a series that adds support for the Bfloat16 extension of the Armv8.6-a architecture, as
detailed here:

https://community.arm.com/developer/ip-products/processors/b/processors-ip-blog/posts/arm-architecture-developments-armv8-6-a

The bfloat type, and its properties are specified in the Arm Architecture
Reference Manual:

https://developer.arm.com/docs/ddi0487/latest/arm-architecture-reference-manual-armv8-for-armv8-a-architecture-profile

Reviewers: aemerson, huntergr, craig.topper, fpetrogalli, sdesmalen, LukeGeeson, ostannard

Reviewed By: ostannard

Subscribers: LukeGeeson, pbarrio, dschuff, kristof.beyls, hiraditya, aheejin, jdoerfert, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D79706

Added: 
    

Modified: 
    llvm/include/llvm/CodeGen/SelectionDAG.h
    llvm/include/llvm/CodeGen/ValueTypes.td
    llvm/include/llvm/IR/Intrinsics.td
    llvm/include/llvm/Support/MachineValueType.h
    llvm/lib/CodeGen/ValueTypes.cpp
    llvm/utils/TableGen/CodeGenTarget.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h
index 8b794d98d81f..462d9f91c4f1 100644

--- a/llvm/include/llvm/CodeGen/SelectionDAG.h
+++ b/llvm/include/llvm/CodeGen/SelectionDAG.h
@@ -1547,6 +1547,7 @@ class SelectionDAG {
     switch (VT.getScalarType().getSimpleVT().SimpleTy) {
     default: llvm_unreachable("Unknown FP format");
     case MVT::f16:     return APFloat::IEEEhalf();
+    case MVT::bf16:    return APFloat::BFloat();
     case MVT::f32:     return APFloat::IEEEsingle();
     case MVT::f64:     return APFloat::IEEEdouble();
     case MVT::f80:     return APFloat::x87DoubleExtended();

diff  --git a/llvm/include/llvm/CodeGen/ValueTypes.td b/llvm/include/llvm/CodeGen/ValueTypes.td
index 2ec0ed7ce3bd..e08a33a50df6 100644
--- a/llvm/include/llvm/CodeGen/ValueTypes.td
+++ b/llvm/include/llvm/CodeGen/ValueTypes.td
@@ -25,148 +25,159 @@ def i16    : ValueType<16 ,  4>;   // 16-bit integer value
 def i32    : ValueType<32 ,  5>;   // 32-bit integer value
 def i64    : ValueType<64 ,  6>;   // 64-bit integer value
 def i128   : ValueType<128,  7>;   // 128-bit integer value
-def f16    : ValueType<16 ,  8>;   // 16-bit floating point value
-def f32    : ValueType<32 ,  9>;   // 32-bit floating point value
-def f64    : ValueType<64 , 10>;   // 64-bit floating point value
-def f80    : ValueType<80 , 11>;   // 80-bit floating point value
-def f128   : ValueType<128, 12>;   // 128-bit floating point value
-def ppcf128: ValueType<128, 13>;   // PPC 128-bit floating point value
-
-def v1i1   : ValueType<1 ,  14>;   //   1 x i1 vector value
-def v2i1   : ValueType<2 ,  15>;   //   2 x i1 vector value
-def v4i1   : ValueType<4 ,  16>;   //   4 x i1 vector value
-def v8i1   : ValueType<8 ,  17>;   //   8 x i1 vector value
-def v16i1  : ValueType<16,  18>;   //  16 x i1 vector value
-def v32i1  : ValueType<32 , 19>;   //  32 x i1 vector value
-def v64i1  : ValueType<64 , 20>;   //  64 x i1 vector value
-def v128i1 : ValueType<128, 21>;   // 128 x i1 vector value
-def v256i1 : ValueType<256, 22>;   // 256 x i1 vector value
-def v512i1 : ValueType<512, 23>;   // 512 x i1 vector value
-def v1024i1: ValueType<1024,24>;   //1024 x i1 vector value
-
-def v1i8   : ValueType<8,   25>;   //  1 x i8  vector value
-def v2i8   : ValueType<16 , 26>;   //  2 x i8  vector value
-def v4i8   : ValueType<32 , 27>;   //  4 x i8  vector value
-def v8i8   : ValueType<64 , 28>;   //  8 x i8  vector value
-def v16i8  : ValueType<128, 29>;   // 16 x i8  vector value
-def v32i8  : ValueType<256, 30>;   // 32 x i8  vector value
-def v64i8  : ValueType<512, 31>;   // 64 x i8  vector value
-def v128i8 : ValueType<1024,32>;   //128 x i8  vector value
-def v256i8 : ValueType<2048,33>;   //256 x i8  vector value
-
-def v1i16  : ValueType<16 , 34>;   //  1 x i16 vector value
-def v2i16  : ValueType<32 , 35>;   //  2 x i16 vector value
-def v3i16  : ValueType<48 , 36>;   //  3 x i16 vector value
-def v4i16  : ValueType<64 , 37>;   //  4 x i16 vector value
-def v8i16  : ValueType<128, 38>;   //  8 x i16 vector value
-def v16i16 : ValueType<256, 39>;   // 16 x i16 vector value
-def v32i16 : ValueType<512, 40>;   // 32 x i16 vector value
-def v64i16 : ValueType<1024,41>;   // 64 x i16 vector value
-def v128i16: ValueType<2048,42>;   //128 x i16 vector value
-
-def v1i32    : ValueType<32 , 43>;   //  1 x i32 vector value
-def v2i32    : ValueType<64 , 44>;   //  2 x i32 vector value
-def v3i32    : ValueType<96 , 45>;   //  3 x i32 vector value
-def v4i32    : ValueType<128, 46>;   //  4 x i32 vector value
-def v5i32    : ValueType<160, 47>;   //  5 x i32 vector value
-def v8i32    : ValueType<256, 48>;   //  8 x i32 vector value
-def v16i32   : ValueType<512, 49>;   // 16 x i32 vector value
-def v32i32   : ValueType<1024,50>;   // 32 x i32 vector value
-def v64i32   : ValueType<2048,51>;   // 64 x i32 vector value
-def v128i32  : ValueType<4096,52>;   // 128 x i32 vector value
-def v256i32  : ValueType<8182,53>;   // 256 x i32 vector value
-def v512i32  : ValueType<16384,54>;  // 512 x i32 vector value
-def v1024i32 : ValueType<32768,55>;  // 1024 x i32 vector value
-def v2048i32 : ValueType<65536,56>;  // 2048 x i32 vector value
-
-def v1i64  : ValueType<64 , 57>;   //  1 x i64 vector value
-def v2i64  : ValueType<128, 58>;   //  2 x i64 vector value
-def v4i64  : ValueType<256, 59>;   //  4 x i64 vector value
-def v8i64  : ValueType<512, 60>;   //  8 x i64 vector value
-def v16i64 : ValueType<1024,61>;   // 16 x i64 vector value
-def v32i64 : ValueType<2048,62>;   // 32 x i64 vector value
-
-def v1i128 : ValueType<128, 63>;   //  1 x i128 vector value
-
-def v2f16    : ValueType<32 , 64>;    //    2 x f16 vector value
-def v3f16    : ValueType<48 , 65>;    //    3 x f16 vector value
-def v4f16    : ValueType<64 , 66>;    //    4 x f16 vector value
-def v8f16    : ValueType<128, 67>;    //    8 x f16 vector value
-def v16f16   : ValueType<256, 68>;    //    8 x f16 vector value
-def v32f16   : ValueType<512, 69>;    //    8 x f16 vector value
-def v1f32    : ValueType<32 , 70>;    //    1 x f32 vector value
-def v2f32    : ValueType<64 , 71>;    //    2 x f32 vector value
-def v3f32    : ValueType<96 , 72>;    //    3 x f32 vector value
-def v4f32    : ValueType<128, 73>;    //    4 x f32 vector value
-def v5f32    : ValueType<160, 74>;    //    5 x f32 vector value
-def v8f32    : ValueType<256, 75>;    //    8 x f32 vector value
-def v16f32   : ValueType<512,  76>;   //   16 x f32 vector value
-def v32f32   : ValueType<1024, 77>;   //   32 x f32 vector value
-def v64f32   : ValueType<2048, 78>;   //   64 x f32 vector value
-def v128f32  : ValueType<4096, 79>;   //  128 x f32 vector value
-def v256f32  : ValueType<8182, 80>;   //  256 x f32 vector value
-def v512f32  : ValueType<16384, 81>;  //  512 x f32 vector value
-def v1024f32 : ValueType<32768, 82>;  // 1024 x f32 vector value
-def v2048f32 : ValueType<65536, 83>;  // 2048 x f32 vector value
-def v1f64    : ValueType<64, 84>;     //    1 x f64 vector value
-def v2f64    : ValueType<128, 85>;    //    2 x f64 vector value
-def v4f64    : ValueType<256, 86>;    //    4 x f64 vector value
-def v8f64    : ValueType<512, 87>;    //    8 x f64 vector value
-def v16f64   : ValueType<1024, 88>;   //   16 x f64 vector value
-
-def nxv1i1  : ValueType<1,   89>;  // n x  1 x i1  vector value
-def nxv2i1  : ValueType<2,   90>;  // n x  2 x i1  vector value
-def nxv4i1  : ValueType<4,   91>;  // n x  4 x i1  vector value
-def nxv8i1  : ValueType<8,   92>;  // n x  8 x i1  vector value
-def nxv16i1 : ValueType<16,  93>;  // n x 16 x i1  vector value
-def nxv32i1 : ValueType<32,  94>;  // n x 32 x i1  vector value
-
-def nxv1i8  : ValueType<8,    95>;  // n x  1 x i8  vector value
-def nxv2i8  : ValueType<16,   96>;  // n x  2 x i8  vector value
-def nxv4i8  : ValueType<32,   97>;  // n x  4 x i8  vector value
-def nxv8i8  : ValueType<64,   98>;  // n x  8 x i8  vector value
-def nxv16i8 : ValueType<128,  99>;  // n x 16 x i8  vector value
-def nxv32i8 : ValueType<256, 100>;  // n x 32 x i8  vector value
-
-def nxv1i16 : ValueType<16,  101>; // n x  1 x i16 vector value
-def nxv2i16 : ValueType<32,  102>; // n x  2 x i16 vector value
-def nxv4i16 : ValueType<64,  103>; // n x  4 x i16 vector value
-def nxv8i16 : ValueType<128, 104>; // n x  8 x i16 vector value
-def nxv16i16: ValueType<256, 105>; // n x 16 x i16 vector value
-def nxv32i16: ValueType<512, 106>; // n x 32 x i16 vector value
-
-def nxv1i32 : ValueType<32,  107>; // n x  1 x i32 vector value
-def nxv2i32 : ValueType<64,  108>; // n x  2 x i32 vector value
-def nxv4i32 : ValueType<128, 109>; // n x  4 x i32 vector value
-def nxv8i32 : ValueType<256, 110>; // n x  8 x i32 vector value
-def nxv16i32: ValueType<512, 111>; // n x 16 x i32 vector value
-def nxv32i32: ValueType<1024,112>; // n x 32 x i32 vector value
-
-def nxv1i64 : ValueType<64,  113>; // n x  1 x i64 vector value
-def nxv2i64 : ValueType<128, 114>; // n x  2 x i64 vector value
-def nxv4i64 : ValueType<256, 115>; // n x  4 x i64 vector value
-def nxv8i64 : ValueType<512, 116>; // n x  8 x i64 vector value
-def nxv16i64: ValueType<1024,117>; // n x 16 x i64 vector value
-def nxv32i64: ValueType<2048,118>; // n x 32 x i64 vector value
-
-def nxv2f16  : ValueType<32 , 119>; // n x  2 x f16 vector value
-def nxv4f16  : ValueType<64 , 120>; // n x  4 x f16 vector value
-def nxv8f16  : ValueType<128, 121>; // n x  8 x f16 vector value
-def nxv1f32  : ValueType<32 , 122>; // n x  1 x f32 vector value
-def nxv2f32  : ValueType<64 , 123>; // n x  2 x f32 vector value
-def nxv4f32  : ValueType<128, 124>; // n x  4 x f32 vector value
-def nxv8f32  : ValueType<256, 125>; // n x  8 x f32 vector value
-def nxv16f32 : ValueType<512, 126>; // n x 16 x f32 vector value
-def nxv1f64  : ValueType<64,  127>; // n x  1 x f64 vector value
-def nxv2f64  : ValueType<128, 128>; // n x  2 x f64 vector value
-def nxv4f64  : ValueType<256, 129>; // n x  4 x f64 vector value
-def nxv8f64  : ValueType<512, 130>; // n x  8 x f64 vector value
-
-def x86mmx : ValueType<64 , 131>;   // X86 MMX value
-def FlagVT : ValueType<0  , 132>;   // Pre-RA sched glue
-def isVoid : ValueType<0  , 133>;   // Produces no value
-def untyped: ValueType<8  , 134>;   // Produces an untyped value
-def exnref : ValueType<0  , 135>;   // WebAssembly's exnref type
+
+def bf16   : ValueType<16 ,  8>;   // 16-bit brain floating point value
+def f16    : ValueType<16 ,  9>;   // 16-bit floating point value
+def f32    : ValueType<32 , 10>;   // 32-bit floating point value
+def f64    : ValueType<64 , 11>;   // 64-bit floating point value
+def f80    : ValueType<80 , 12>;   // 80-bit floating point value
+def f128   : ValueType<128, 13>;   // 128-bit floating point value
+def ppcf128: ValueType<128, 14>;   // PPC 128-bit floating point value
+
+def v1i1   : ValueType<1 ,  15>;   //   1 x i1 vector value
+def v2i1   : ValueType<2 ,  16>;   //   2 x i1 vector value
+def v4i1   : ValueType<4 ,  17>;   //   4 x i1 vector value
+def v8i1   : ValueType<8 ,  18>;   //   8 x i1 vector value
+def v16i1  : ValueType<16,  19>;   //  16 x i1 vector value
+def v32i1  : ValueType<32 , 20>;   //  32 x i1 vector value
+def v64i1  : ValueType<64 , 21>;   //  64 x i1 vector value
+def v128i1 : ValueType<128, 22>;   // 128 x i1 vector value
+def v256i1 : ValueType<256, 23>;   // 256 x i1 vector value
+def v512i1 : ValueType<512, 24>;   // 512 x i1 vector value
+def v1024i1: ValueType<1024,25>;   //1024 x i1 vector value
+
+def v1i8   : ValueType<8,   26>;   //  1 x i8  vector value
+def v2i8   : ValueType<16 , 27>;   //  2 x i8  vector value
+def v4i8   : ValueType<32 , 28>;   //  4 x i8  vector value
+def v8i8   : ValueType<64 , 29>;   //  8 x i8  vector value
+def v16i8  : ValueType<128, 30>;   // 16 x i8  vector value
+def v32i8  : ValueType<256, 31>;   // 32 x i8  vector value
+def v64i8  : ValueType<512, 32>;   // 64 x i8  vector value
+def v128i8 : ValueType<1024,33>;   //128 x i8  vector value
+def v256i8 : ValueType<2048,34>;   //256 x i8  vector value
+
+def v1i16  : ValueType<16 , 35>;   //  1 x i16 vector value
+def v2i16  : ValueType<32 , 36>;   //  2 x i16 vector value
+def v3i16  : ValueType<48 , 37>;   //  3 x i16 vector value
+def v4i16  : ValueType<64 , 38>;   //  4 x i16 vector value
+def v8i16  : ValueType<128, 39>;   //  8 x i16 vector value
+def v16i16 : ValueType<256, 40>;   // 16 x i16 vector value
+def v32i16 : ValueType<512, 41>;   // 32 x i16 vector value
+def v64i16 : ValueType<1024,42>;   // 64 x i16 vector value
+def v128i16: ValueType<2048,43>;   //128 x i16 vector value
+
+def v1i32    : ValueType<32 , 44>;   //  1 x i32 vector value
+def v2i32    : ValueType<64 , 45>;   //  2 x i32 vector value
+def v3i32    : ValueType<96 , 46>;   //  3 x i32 vector value
+def v4i32    : ValueType<128, 47>;   //  4 x i32 vector value
+def v5i32    : ValueType<160, 48>;   //  5 x i32 vector value
+def v8i32    : ValueType<256, 49>;   //  8 x i32 vector value
+def v16i32   : ValueType<512, 50>;   // 16 x i32 vector value
+def v32i32   : ValueType<1024,51>;   // 32 x i32 vector value
+def v64i32   : ValueType<2048,52>;   // 64 x i32 vector value
+def v128i32  : ValueType<4096,53>;   // 128 x i32 vector value
+def v256i32  : ValueType<8182,54>;   // 256 x i32 vector value
+def v512i32  : ValueType<16384,55>;  // 512 x i32 vector value
+def v1024i32 : ValueType<32768,56>;  // 1024 x i32 vector value
+def v2048i32 : ValueType<65536,57>;  // 2048 x i32 vector value
+
+def v1i64  : ValueType<64 , 58>;   //  1 x i64 vector value
+def v2i64  : ValueType<128, 59>;   //  2 x i64 vector value
+def v4i64  : ValueType<256, 60>;   //  4 x i64 vector value
+def v8i64  : ValueType<512, 61>;   //  8 x i64 vector value
+def v16i64 : ValueType<1024,62>;   // 16 x i64 vector value
+def v32i64 : ValueType<2048,63>;   // 32 x i64 vector value
+
+def v1i128 : ValueType<128, 64>;   //  1 x i128 vector value
+
+def v2f16    : ValueType<32 , 65>;    //    2 x f16 vector value
+def v3f16    : ValueType<48 , 66>;    //    3 x f16 vector value
+def v4f16    : ValueType<64 , 67>;    //    4 x f16 vector value
+def v8f16    : ValueType<128, 68>;    //    8 x f16 vector value
+def v16f16   : ValueType<256, 69>;    //   16 x f16 vector value
+def v32f16   : ValueType<512, 70>;    //   32 x f16 vector value
+def v2bf16   : ValueType<32 , 71>;    //    2 x bf16 vector value
+def v3bf16   : ValueType<48 , 72>;    //    3 x bf16 vector value
+def v4bf16   : ValueType<64 , 73>;    //    4 x bf16 vector value
+def v8bf16   : ValueType<128, 74>;    //    8 x bf16 vector value
+def v16bf16  : ValueType<256, 75>;    //   16 x bf16 vector value
+def v32bf16  : ValueType<512, 76>;    //   32 x bf16 vector value
+def v1f32    : ValueType<32 , 77>;    //    1 x f32 vector value
+def v2f32    : ValueType<64 , 78>;    //    2 x f32 vector value
+def v3f32    : ValueType<96 , 79>;    //    3 x f32 vector value
+def v4f32    : ValueType<128, 80>;    //    4 x f32 vector value
+def v5f32    : ValueType<160, 81>;    //    5 x f32 vector value
+def v8f32    : ValueType<256, 82>;    //    8 x f32 vector value
+def v16f32   : ValueType<512,  83>;   //   16 x f32 vector value
+def v32f32   : ValueType<1024, 84>;   //   32 x f32 vector value
+def v64f32   : ValueType<2048, 85>;   //   64 x f32 vector value
+def v128f32  : ValueType<4096, 86>;   //  128 x f32 vector value
+def v256f32  : ValueType<8182, 87>;   //  256 x f32 vector value
+def v512f32  : ValueType<16384, 88>;  //  512 x f32 vector value
+def v1024f32 : ValueType<32768, 89>;  // 1024 x f32 vector value
+def v2048f32 : ValueType<65536, 90>;  // 2048 x f32 vector value
+def v1f64    : ValueType<64, 91>;     //    1 x f64 vector value
+def v2f64    : ValueType<128, 92>;    //    2 x f64 vector value
+def v4f64    : ValueType<256, 93>;    //    4 x f64 vector value
+def v8f64    : ValueType<512, 94>;    //    8 x f64 vector value
+def v16f64   : ValueType<1024, 95>;   //   16 x f64 vector value
+
+def nxv1i1  : ValueType<1,   96>;  // n x  1 x i1  vector value
+def nxv2i1  : ValueType<2,   97>;  // n x  2 x i1  vector value
+def nxv4i1  : ValueType<4,   98>;  // n x  4 x i1  vector value
+def nxv8i1  : ValueType<8,   99>;  // n x  8 x i1  vector value
+def nxv16i1 : ValueType<16, 100>;  // n x 16 x i1  vector value
+def nxv32i1 : ValueType<32, 101>;  // n x 32 x i1  vector value
+
+def nxv1i8  : ValueType<8,   102>;  // n x  1 x i8  vector value
+def nxv2i8  : ValueType<16,  103>;  // n x  2 x i8  vector value
+def nxv4i8  : ValueType<32,  104>;  // n x  4 x i8  vector value
+def nxv8i8  : ValueType<64,  105>;  // n x  8 x i8  vector value
+def nxv16i8 : ValueType<128, 106>;  // n x 16 x i8  vector value
+def nxv32i8 : ValueType<256, 107>;  // n x 32 x i8  vector value
+
+def nxv1i16 : ValueType<16,  108>; // n x  1 x i16 vector value
+def nxv2i16 : ValueType<32,  109>; // n x  2 x i16 vector value
+def nxv4i16 : ValueType<64,  110>; // n x  4 x i16 vector value
+def nxv8i16 : ValueType<128, 111>; // n x  8 x i16 vector value
+def nxv16i16: ValueType<256, 112>; // n x 16 x i16 vector value
+def nxv32i16: ValueType<512, 113>; // n x 32 x i16 vector value
+
+def nxv1i32 : ValueType<32,  114>; // n x  1 x i32 vector value
+def nxv2i32 : ValueType<64,  115>; // n x  2 x i32 vector value
+def nxv4i32 : ValueType<128, 116>; // n x  4 x i32 vector value
+def nxv8i32 : ValueType<256, 117>; // n x  8 x i32 vector value
+def nxv16i32: ValueType<512, 118>; // n x 16 x i32 vector value
+def nxv32i32: ValueType<1024,119>; // n x 32 x i32 vector value
+
+def nxv1i64 : ValueType<64,  120>; // n x  1 x i64 vector value
+def nxv2i64 : ValueType<128, 121>; // n x  2 x i64 vector value
+def nxv4i64 : ValueType<256, 122>; // n x  4 x i64 vector value
+def nxv8i64 : ValueType<512, 123>; // n x  8 x i64 vector value
+def nxv16i64: ValueType<1024,124>; // n x 16 x i64 vector value
+def nxv32i64: ValueType<2048,125>; // n x 32 x i64 vector value
+
+def nxv2f16  : ValueType<32 , 126>; // n x  2 x f16 vector value
+def nxv4f16  : ValueType<64 , 127>; // n x  4 x f16 vector value
+def nxv8f16  : ValueType<128, 128>; // n x  8 x f16 vector value
+def nxv2bf16 : ValueType<32 , 129>; // n x  2 x bf16 vector value
+def nxv4bf16 : ValueType<64 , 130>; // n x  4 x bf16 vector value
+def nxv8bf16 : ValueType<128, 131>; // n x  8 x bf16 vector value
+def nxv1bf32 : ValueType<32 , 132>; // n x  1 x f32 vector value
+def nxv2f32  : ValueType<64 , 133>; // n x  2 x f32 vector value
+def nxv4f32  : ValueType<128, 134>; // n x  4 x f32 vector value
+def nxv8f32  : ValueType<256, 135>; // n x  8 x f32 vector value
+def nxv16f32 : ValueType<512, 136>; // n x 16 x f32 vector value
+def nxv1f64  : ValueType<64,  137>; // n x  1 x f64 vector value
+def nxv2f64  : ValueType<128, 138>; // n x  2 x f64 vector value
+def nxv4f64  : ValueType<256, 139>; // n x  4 x f64 vector value
+def nxv8f64  : ValueType<512, 140>; // n x  8 x f64 vector value
+
+def x86mmx : ValueType<64 , 141>;   // X86 MMX value
+def FlagVT : ValueType<0  , 142>;   // Pre-RA sched glue
+def isVoid : ValueType<0  , 143>;   // Produces no value
+def untyped: ValueType<8  , 144>;   // Produces an untyped value
+def exnref : ValueType<0  , 145>;      // WebAssembly's exnref type
 def token  : ValueType<0  , 248>;   // TokenTy
 def MetadataVT: ValueType<0, 249>;  // Metadata
 

diff  --git a/llvm/include/llvm/IR/Intrinsics.td b/llvm/include/llvm/IR/Intrinsics.td
index 7bfb25b0ed7d..33961767e1c0 100644
--- a/llvm/include/llvm/IR/Intrinsics.td
+++ b/llvm/include/llvm/IR/Intrinsics.td
@@ -214,6 +214,7 @@ def llvm_i16_ty        : LLVMType<i16>;
 def llvm_i32_ty        : LLVMType<i32>;
 def llvm_i64_ty        : LLVMType<i64>;
 def llvm_half_ty       : LLVMType<f16>;
+def llvm_bfloat_ty     : LLVMType<bf16>;
 def llvm_float_ty      : LLVMType<f32>;
 def llvm_double_ty     : LLVMType<f64>;
 def llvm_f80_ty        : LLVMType<f80>;

diff  --git a/llvm/include/llvm/Support/MachineValueType.h b/llvm/include/llvm/Support/MachineValueType.h
index 224353c5047f..93683eb7dcf7 100644
--- a/llvm/include/llvm/Support/MachineValueType.h
+++ b/llvm/include/llvm/Support/MachineValueType.h
@@ -47,100 +47,107 @@ namespace llvm {
       FIRST_INTEGER_VALUETYPE = i1,
       LAST_INTEGER_VALUETYPE  = i128,
 
-      f16            =   8,   // This is a 16 bit floating point value
-      f32            =   9,   // This is a 32 bit floating point value
-      f64            =  10,   // This is a 64 bit floating point value
-      f80            =  11,   // This is a 80 bit floating point value
-      f128           =  12,   // This is a 128 bit floating point value
-      ppcf128        =  13,   // This is a PPC 128-bit floating point value
-
-      FIRST_FP_VALUETYPE = f16,
+      bf16           =   8,   // This is a 16 bit brain floating point value
+      f16            =   9,   // This is a 16 bit floating point value
+      f32            =  10,   // This is a 32 bit floating point value
+      f64            =  11,   // This is a 64 bit floating point value
+      f80            =  12,   // This is a 80 bit floating point value
+      f128           =  13,   // This is a 128 bit floating point value
+      ppcf128        =  14,   // This is a PPC 128-bit floating point value
+
+      FIRST_FP_VALUETYPE = bf16,
       LAST_FP_VALUETYPE  = ppcf128,
 
-      v1i1           =  14,   //    1 x i1
-      v2i1           =  15,   //    2 x i1
-      v4i1           =  16,   //    4 x i1
-      v8i1           =  17,   //    8 x i1
-      v16i1          =  18,   //   16 x i1
-      v32i1          =  19,   //   32 x i1
-      v64i1          =  20,   //   64 x i1
-      v128i1         =  21,   //  128 x i1
-      v256i1         =  22,   //  256 x i1
-      v512i1         =  23,   //  512 x i1
-      v1024i1        =  24,   // 1024 x i1
-
-      v1i8           =  25,   //  1 x i8
-      v2i8           =  26,   //  2 x i8
-      v4i8           =  27,   //  4 x i8
-      v8i8           =  28,   //  8 x i8
-      v16i8          =  29,   // 16 x i8
-      v32i8          =  30,   // 32 x i8
-      v64i8          =  31,   // 64 x i8
-      v128i8         =  32,   //128 x i8
-      v256i8         =  33,   //256 x i8
-
-      v1i16          =  34,   //  1 x i16
-      v2i16          =  35,   //  2 x i16
-      v3i16          =  36,   //  3 x i16
-      v4i16          =  37,   //  4 x i16
-      v8i16          =  38,   //  8 x i16
-      v16i16         =  39,   // 16 x i16
-      v32i16         =  40,   // 32 x i16
-      v64i16         =  41,   // 64 x i16
-      v128i16        =  42,   //128 x i16
-
-      v1i32          =  43,   //    1 x i32
-      v2i32          =  44,   //    2 x i32
-      v3i32          =  45,   //    3 x i32
-      v4i32          =  46,   //    4 x i32
-      v5i32          =  47,   //    5 x i32
-      v8i32          =  48,   //    8 x i32
-      v16i32         =  49,   //   16 x i32
-      v32i32         =  50,   //   32 x i32
-      v64i32         =  51,   //   64 x i32
-      v128i32        =  52,   //  128 x i32
-      v256i32        =  53,   //  256 x i32
-      v512i32        =  54,   //  512 x i32
-      v1024i32       =  55,   // 1024 x i32
-      v2048i32       =  56,   // 2048 x i32
-
-      v1i64          =  57,   //  1 x i64
-      v2i64          =  58,   //  2 x i64
-      v4i64          =  59,   //  4 x i64
-      v8i64          =  60,   //  8 x i64
-      v16i64         =  61,   // 16 x i64
-      v32i64         =  62,   // 32 x i64
-
-      v1i128         =  63,   //  1 x i128
+      v1i1           =  15,   //    1 x i1
+      v2i1           =  16,   //    2 x i1
+      v4i1           =  17,   //    4 x i1
+      v8i1           =  18,   //    8 x i1
+      v16i1          =  19,   //   16 x i1
+      v32i1          =  20,   //   32 x i1
+      v64i1          =  21,   //   64 x i1
+      v128i1         =  22,   //  128 x i1
+      v256i1         =  23,   //  256 x i1
+      v512i1         =  24,   //  512 x i1
+      v1024i1        =  25,   // 1024 x i1
+
+      v1i8           =  26,   //  1 x i8
+      v2i8           =  27,   //  2 x i8
+      v4i8           =  28,   //  4 x i8
+      v8i8           =  29,   //  8 x i8
+      v16i8          =  30,   // 16 x i8
+      v32i8          =  31,   // 32 x i8
+      v64i8          =  32,   // 64 x i8
+      v128i8         =  33,   //128 x i8
+      v256i8         =  34,   //256 x i8
+
+      v1i16          =  35,   //  1 x i16
+      v2i16          =  36,   //  2 x i16
+      v3i16          =  37,   //  3 x i16
+      v4i16          =  38,   //  4 x i16
+      v8i16          =  39,   //  8 x i16
+      v16i16         =  40,   // 16 x i16
+      v32i16         =  41,   // 32 x i16
+      v64i16         =  42,   // 64 x i16
+      v128i16        =  43,   //128 x i16
+
+      v1i32          =  44,   //    1 x i32
+      v2i32          =  45,   //    2 x i32
+      v3i32          =  46,   //    3 x i32
+      v4i32          =  47,   //    4 x i32
+      v5i32          =  48,   //    5 x i32
+      v8i32          =  49,   //    8 x i32
+      v16i32         =  50,   //   16 x i32
+      v32i32         =  51,   //   32 x i32
+      v64i32         =  52,   //   64 x i32
+      v128i32        =  53,   //  128 x i32
+      v256i32        =  54,   //  256 x i32
+      v512i32        =  55,   //  512 x i32
+      v1024i32       =  56,   // 1024 x i32
+      v2048i32       =  57,   // 2048 x i32
+
+      v1i64          =  58,   //  1 x i64
+      v2i64          =  59,   //  2 x i64
+      v4i64          =  60,   //  4 x i64
+      v8i64          =  61,   //  8 x i64
+      v16i64         =  62,   // 16 x i64
+      v32i64         =  63,   // 32 x i64
+
+      v1i128         =  64,   //  1 x i128
 
       FIRST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i1,
       LAST_INTEGER_FIXEDLEN_VECTOR_VALUETYPE = v1i128,
 
-      v2f16          =  64,   //    2 x f16
-      v3f16          =  65,   //    3 x f16
-      v4f16          =  66,   //    4 x f16
-      v8f16          =  67,   //    8 x f16
-      v16f16         =  68,   //   16 x f16
-      v32f16         =  69,   //   32 x f16
-      v1f32          =  70,   //    1 x f32
-      v2f32          =  71,   //    2 x f32
-      v3f32          =  72,   //    3 x f32
-      v4f32          =  73,   //    4 x f32
-      v5f32          =  74,   //    5 x f32
-      v8f32          =  75,   //    8 x f32
-      v16f32         =  76,   //   16 x f32
-      v32f32         =  77,   //   32 x f32
-      v64f32         =  78,   //   64 x f32
-      v128f32        =  79,   //  128 x f32
-      v256f32        =  80,   //  256 x f32
-      v512f32        =  81,   //  512 x f32
-      v1024f32       =  82,   // 1024 x f32
-      v2048f32       =  83,   // 2048 x f32
-      v1f64          =  84,   //    1 x f64
-      v2f64          =  85,   //    2 x f64
-      v4f64          =  86,   //    4 x f64
-      v8f64          =  87,   //    8 x f64
-      v16f64         =  88,   //   16 x f64
+      v2f16          =  65,   //    2 x f16
+      v3f16          =  66,   //    3 x f16
+      v4f16          =  67,   //    4 x f16
+      v8f16          =  68,   //    8 x f16
+      v16f16         =  69,   //   16 x f16
+      v32f16         =  70,   //   32 x f16
+      v2bf16         =  71,   //    2 x bf16
+      v3bf16         =  72,   //    3 x bf16
+      v4bf16         =  73,   //    4 x bf16
+      v8bf16         =  74,   //    8 x bf16
+      v16bf16        =  75,   //   16 x bf16
+      v32bf16        =  76,   //   32 x bf16
+      v1f32          =  77,   //    1 x f32
+      v2f32          =  78,   //    2 x f32
+      v3f32          =  79,   //    3 x f32
+      v4f32          =  80,   //    4 x f32
+      v5f32          =  81,   //    5 x f32
+      v8f32          =  82,   //    8 x f32
+      v16f32         =  83,   //   16 x f32
+      v32f32         =  84,   //   32 x f32
+      v64f32         =  85,   //   64 x f32
+      v128f32        =  86,   //  128 x f32
+      v256f32        =  87,   //  256 x f32
+      v512f32        =  88,   //  512 x f32
+      v1024f32       =  89,   // 1024 x f32
+      v2048f32       =  90,   // 2048 x f32
+      v1f64          =  91,   //    1 x f64
+      v2f64          =  92,   //    2 x f64
+      v4f64          =  93,   //    4 x f64
+      v8f64          =  94,   //    8 x f64
+      v16f64         =  95,   //   16 x f64
 
       FIRST_FP_FIXEDLEN_VECTOR_VALUETYPE = v2f16,
       LAST_FP_FIXEDLEN_VECTOR_VALUETYPE = v16f64,
@@ -148,56 +155,59 @@ namespace llvm {
       FIRST_FIXEDLEN_VECTOR_VALUETYPE = v1i1,
       LAST_FIXEDLEN_VECTOR_VALUETYPE = v16f64,
 
-      nxv1i1         =  89,   // n x  1 x i1
-      nxv2i1         =  90,   // n x  2 x i1
-      nxv4i1         =  91,   // n x  4 x i1
-      nxv8i1         =  92,   // n x  8 x i1
-      nxv16i1        =  93,   // n x 16 x i1
-      nxv32i1        =  94,   // n x 32 x i1
-
-      nxv1i8         =  95,   // n x  1 x i8
-      nxv2i8         =  96,   // n x  2 x i8
-      nxv4i8         =  97,   // n x  4 x i8
-      nxv8i8         =  98,   // n x  8 x i8
-      nxv16i8        =  99,   // n x 16 x i8
-      nxv32i8        =  100,  // n x 32 x i8
-
-      nxv1i16        =  101,  // n x  1 x i16
-      nxv2i16        =  102,  // n x  2 x i16
-      nxv4i16        =  103,  // n x  4 x i16
-      nxv8i16        =  104,  // n x  8 x i16
-      nxv16i16       =  105,  // n x 16 x i16
-      nxv32i16       =  106,  // n x 32 x i16
-
-      nxv1i32        =  107,  // n x  1 x i32
-      nxv2i32        =  108,  // n x  2 x i32
-      nxv4i32        =  109,  // n x  4 x i32
-      nxv8i32        =  110,  // n x  8 x i32
-      nxv16i32       =  111,  // n x 16 x i32
-      nxv32i32       =  112,  // n x 32 x i32
-
-      nxv1i64        =  113,  // n x  1 x i64
-      nxv2i64        =  114,  // n x  2 x i64
-      nxv4i64        =  115,  // n x  4 x i64
-      nxv8i64        =  116,  // n x  8 x i64
-      nxv16i64       =  117,  // n x 16 x i64
-      nxv32i64       =  118,  // n x 32 x i64
+      nxv1i1         =  96,   // n x  1 x i1
+      nxv2i1         =  97,   // n x  2 x i1
+      nxv4i1         =  98,   // n x  4 x i1
+      nxv8i1         =  99,   // n x  8 x i1
+      nxv16i1        = 100,   // n x 16 x i1
+      nxv32i1        = 101,   // n x 32 x i1
+
+      nxv1i8         = 102,   // n x  1 x i8
+      nxv2i8         = 103,   // n x  2 x i8
+      nxv4i8         = 104,   // n x  4 x i8
+      nxv8i8         = 105,   // n x  8 x i8
+      nxv16i8        = 106,   // n x 16 x i8
+      nxv32i8        = 107,   // n x 32 x i8
+
+      nxv1i16        = 108,  // n x  1 x i16
+      nxv2i16        = 109,  // n x  2 x i16
+      nxv4i16        = 110,  // n x  4 x i16
+      nxv8i16        = 111,  // n x  8 x i16
+      nxv16i16       = 112,  // n x 16 x i16
+      nxv32i16       = 113,  // n x 32 x i16
+
+      nxv1i32        = 114,  // n x  1 x i32
+      nxv2i32        = 115,  // n x  2 x i32
+      nxv4i32        = 116,  // n x  4 x i32
+      nxv8i32        = 117,  // n x  8 x i32
+      nxv16i32       = 118,  // n x 16 x i32
+      nxv32i32       = 119,  // n x 32 x i32
+
+      nxv1i64        = 120,  // n x  1 x i64
+      nxv2i64        = 121,  // n x  2 x i64
+      nxv4i64        = 122,  // n x  4 x i64
+      nxv8i64        = 123,  // n x  8 x i64
+      nxv16i64       = 124,  // n x 16 x i64
+      nxv32i64       = 125,  // n x 32 x i64
 
       FIRST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv1i1,
       LAST_INTEGER_SCALABLE_VECTOR_VALUETYPE = nxv32i64,
 
-      nxv2f16        =  119,  // n x  2 x f16
-      nxv4f16        =  120,  // n x  4 x f16
-      nxv8f16        =  121,  // n x  8 x f16
-      nxv1f32        =  122,  // n x  1 x f32
-      nxv2f32        =  123,  // n x  2 x f32
-      nxv4f32        =  124,  // n x  4 x f32
-      nxv8f32        =  125,  // n x  8 x f32
-      nxv16f32       =  126,  // n x 16 x f32
-      nxv1f64        =  127,  // n x  1 x f64
-      nxv2f64        =  128,  // n x  2 x f64
-      nxv4f64        =  129,  // n x  4 x f64
-      nxv8f64        =  130,  // n x  8 x f64
+      nxv2f16        = 126,  // n x  2 x f16
+      nxv4f16        = 127,  // n x  4 x f16
+      nxv8f16        = 128,  // n x  8 x f16
+      nxv2bf16       = 129,  // n x  2 x bf16
+      nxv4bf16       = 130,  // n x  4 x bf16
+      nxv8bf16       = 131,  // n x  8 x bf16
+      nxv1f32        = 132,  // n x  1 x f32
+      nxv2f32        = 133,  // n x  2 x f32
+      nxv4f32        = 134,  // n x  4 x f32
+      nxv8f32        = 135,  // n x  8 x f32
+      nxv16f32       = 136,  // n x 16 x f32
+      nxv1f64        = 137,  // n x  1 x f64
+      nxv2f64        = 138,  // n x  2 x f64
+      nxv4f64        = 139,  // n x  4 x f64
+      nxv8f64        = 140,  // n x  8 x f64
 
       FIRST_FP_SCALABLE_VECTOR_VALUETYPE = nxv2f16,
       LAST_FP_SCALABLE_VECTOR_VALUETYPE = nxv8f64,
@@ -208,20 +218,20 @@ namespace llvm {
       FIRST_VECTOR_VALUETYPE = v1i1,
       LAST_VECTOR_VALUETYPE  = nxv8f64,
 
-      x86mmx         =  131,   // This is an X86 MMX value
+      x86mmx         = 141,   // This is an X86 MMX value
 
-      Glue           =  132,   // This glues nodes together during pre-RA sched
+      Glue           = 142,   // This glues nodes together during pre-RA sched
 
-      isVoid         =  133,   // This has no value
+      isVoid         = 143,   // This has no value
 
-      Untyped        =  134,   // This value takes a register, but has
+      Untyped        = 144,   // This value takes a register, but has
                                // unspecified type.  The register class
                                // will be determined by the opcode.
 
-      exnref         =  135,   // WebAssembly's exnref type
+      exnref         = 145,   // WebAssembly's exnref type
 
       FIRST_VALUETYPE = 1,     // This is always the beginning of the list.
-      LAST_VALUETYPE =  136,   // This always remains at the end of the list.
+      LAST_VALUETYPE = 146,   // This always remains at the end of the list.
 
       // This is the current maximum for LAST_VALUETYPE.
       // MVT::MAX_ALLOWED_VALUETYPE is used for asserts and to size bit vectors
@@ -333,17 +343,19 @@ namespace llvm {
 
     /// Return true if this is a 32-bit vector type.
     bool is32BitVector() const {
-      return (SimpleTy == MVT::v32i1 || SimpleTy == MVT::v4i8  ||
-              SimpleTy == MVT::v2i16 || SimpleTy == MVT::v1i32 ||
-              SimpleTy == MVT::v2f16 || SimpleTy == MVT::v1f32);
+      return (SimpleTy == MVT::v32i1 || SimpleTy == MVT::v4i8   ||
+              SimpleTy == MVT::v2i16 || SimpleTy == MVT::v1i32  ||
+              SimpleTy == MVT::v2f16 || SimpleTy == MVT::v2bf16 ||
+              SimpleTy == MVT::v1f32);
     }
 
     /// Return true if this is a 64-bit vector type.
     bool is64BitVector() const {
-      return (SimpleTy == MVT::v64i1 || SimpleTy == MVT::v8i8  ||
-              SimpleTy == MVT::v4i16 || SimpleTy == MVT::v2i32 ||
-              SimpleTy == MVT::v1i64 || SimpleTy == MVT::v4f16 ||
-              SimpleTy == MVT::v2f32 || SimpleTy == MVT::v1f64);
+      return (SimpleTy == MVT::v64i1  || SimpleTy == MVT::v8i8  ||
+              SimpleTy == MVT::v4i16  || SimpleTy == MVT::v2i32 ||
+              SimpleTy == MVT::v1i64  || SimpleTy == MVT::v4f16 ||
+              SimpleTy == MVT::v4bf16 ||SimpleTy == MVT::v2f32  ||
+              SimpleTy == MVT::v1f64);
     }
 
     /// Return true if this is a 128-bit vector type.
@@ -351,24 +363,26 @@ namespace llvm {
       return (SimpleTy == MVT::v128i1 || SimpleTy == MVT::v16i8  ||
               SimpleTy == MVT::v8i16  || SimpleTy == MVT::v4i32  ||
               SimpleTy == MVT::v2i64  || SimpleTy == MVT::v1i128 ||
-              SimpleTy == MVT::v8f16  || SimpleTy == MVT::v4f32  ||
-              SimpleTy == MVT::v2f64);
+              SimpleTy == MVT::v8f16  || SimpleTy == MVT::v8bf16 ||
+              SimpleTy == MVT::v4f32  || SimpleTy == MVT::v2f64);
     }
 
     /// Return true if this is a 256-bit vector type.
     bool is256BitVector() const {
-      return (SimpleTy == MVT::v16f16 || SimpleTy == MVT::v8f32 ||
-              SimpleTy == MVT::v4f64  || SimpleTy == MVT::v32i8 ||
-              SimpleTy == MVT::v16i16 || SimpleTy == MVT::v8i32 ||
-              SimpleTy == MVT::v4i64  || SimpleTy == MVT::v256i1);
+      return (SimpleTy == MVT::v16f16 || SimpleTy == MVT::v16bf16 ||
+              SimpleTy == MVT::v8f32  || SimpleTy == MVT::v4f64   ||
+              SimpleTy == MVT::v32i8  || SimpleTy == MVT::v16i16  ||
+              SimpleTy == MVT::v8i32  || SimpleTy == MVT::v4i64   ||
+              SimpleTy == MVT::v256i1);
     }
 
     /// Return true if this is a 512-bit vector type.
     bool is512BitVector() const {
-      return (SimpleTy == MVT::v32f16 || SimpleTy == MVT::v16f32 ||
-              SimpleTy == MVT::v8f64  || SimpleTy == MVT::v512i1 ||
-              SimpleTy == MVT::v64i8  || SimpleTy == MVT::v32i16 ||
-              SimpleTy == MVT::v16i32 || SimpleTy == MVT::v8i64);
+      return (SimpleTy == MVT::v32f16 || SimpleTy == MVT::v32bf16 ||
+              SimpleTy == MVT::v16f32 || SimpleTy == MVT::v8f64   ||
+              SimpleTy == MVT::v512i1 || SimpleTy == MVT::v64i8   ||
+              SimpleTy == MVT::v32i16 || SimpleTy == MVT::v16i32  ||
+              SimpleTy == MVT::v8i64);
     }
 
     /// Return true if this is a 1024-bit vector type.
@@ -515,6 +529,15 @@ namespace llvm {
       case nxv2f16:
       case nxv4f16:
       case nxv8f16: return f16;
+      case v2bf16:
+      case v3bf16:
+      case v4bf16:
+      case v8bf16:
+      case v16bf16:
+      case v32bf16:
+      case nxv2bf16:
+      case nxv4bf16:
+      case nxv8bf16: return bf16;
       case v1f32:
       case v2f32:
       case v3f32:
@@ -578,6 +601,7 @@ namespace llvm {
       case v32i32:
       case v32i64:
       case v32f16:
+      case v32bf16:
       case v32f32:
       case nxv32i1:
       case nxv32i8:
@@ -590,6 +614,7 @@ namespace llvm {
       case v16i32:
       case v16i64:
       case v16f16:
+      case v16bf16:
       case v16f32:
       case v16f64:
       case nxv16i1:
@@ -604,6 +629,7 @@ namespace llvm {
       case v8i32:
       case v8i64:
       case v8f16:
+      case v8bf16:
       case v8f32:
       case v8f64:
       case nxv8i1:
@@ -612,6 +638,7 @@ namespace llvm {
       case nxv8i32:
       case nxv8i64:
       case nxv8f16:
+      case nxv8bf16:
       case nxv8f32:
       case nxv8f64: return 8;
       case v5i32:
@@ -622,6 +649,7 @@ namespace llvm {
       case v4i32:
       case v4i64:
       case v4f16:
+      case v4bf16:
       case v4f32:
       case v4f64:
       case nxv4i1:
@@ -630,11 +658,13 @@ namespace llvm {
       case nxv4i32:
       case nxv4i64:
       case nxv4f16:
+      case nxv4bf16:
       case nxv4f32:
       case nxv4f64: return 4;
       case v3i16:
       case v3i32:
       case v3f16:
+      case v3bf16:
       case v3f32: return 3;
       case v2i1:
       case v2i8:
@@ -642,6 +672,7 @@ namespace llvm {
       case v2i32:
       case v2i64:
       case v2f16:
+      case v2bf16:
       case v2f32:
       case v2f64:
       case nxv2i1:
@@ -650,6 +681,7 @@ namespace llvm {
       case nxv2i32:
       case nxv2i64:
       case nxv2f16:
+      case nxv2bf16:
       case nxv2f32:
       case nxv2f64: return 2;
       case v1i1:
@@ -712,6 +744,7 @@ namespace llvm {
       case nxv8i1: return TypeSize::Scalable(8);
       case i16 :
       case f16:
+      case bf16:
       case v16i1:
       case v2i8:
       case v1i16: return TypeSize::Fixed(16);
@@ -724,6 +757,7 @@ namespace llvm {
       case v4i8:
       case v2i16:
       case v2f16:
+      case v2bf16:
       case v1f32:
       case v1i32: return TypeSize::Fixed(32);
       case nxv32i1:
@@ -731,9 +765,11 @@ namespace llvm {
       case nxv2i16:
       case nxv1i32:
       case nxv2f16:
+      case nxv2bf16:
       case nxv1f32: return TypeSize::Scalable(32);
       case v3i16:
-      case v3f16: return TypeSize::Fixed(48);
+      case v3f16:
+      case v3bf16: return TypeSize::Fixed(48);
       case x86mmx:
       case f64 :
       case i64 :
@@ -743,6 +779,7 @@ namespace llvm {
       case v2i32:
       case v1i64:
       case v4f16:
+      case v4bf16:
       case v2f32:
       case v1f64: return TypeSize::Fixed(64);
       case nxv8i8:
@@ -750,6 +787,7 @@ namespace llvm {
       case nxv2i32:
       case nxv1i64:
       case nxv4f16:
+      case nxv4bf16:
       case nxv2f32:
       case nxv1f64: return TypeSize::Scalable(64);
       case f80 :  return TypeSize::Fixed(80);
@@ -765,6 +803,7 @@ namespace llvm {
       case v2i64:
       case v1i128:
       case v8f16:
+      case v8bf16:
       case v4f32:
       case v2f64: return TypeSize::Fixed(128);
       case nxv16i8:
@@ -772,6 +811,7 @@ namespace llvm {
       case nxv4i32:
       case nxv2i64:
       case nxv8f16:
+      case nxv8bf16:
       case nxv4f32:
       case nxv2f64: return TypeSize::Scalable(128);
       case v5i32:
@@ -782,6 +822,7 @@ namespace llvm {
       case v8i32:
       case v4i64:
       case v16f16:
+      case v16bf16:
       case v8f32:
       case v4f64: return TypeSize::Fixed(256);
       case nxv32i8:
@@ -796,6 +837,7 @@ namespace llvm {
       case v16i32:
       case v8i64:
       case v32f16:
+      case v32bf16:
       case v16f32:
       case v8f64: return TypeSize::Fixed(512);
       case nxv32i16:
@@ -993,6 +1035,14 @@ namespace llvm {
         if (NumElements == 16) return MVT::v16f16;
         if (NumElements == 32) return MVT::v32f16;
         break;
+      case MVT::bf16:
+        if (NumElements == 2)  return MVT::v2bf16;
+        if (NumElements == 3)  return MVT::v3bf16;
+        if (NumElements == 4)  return MVT::v4bf16;
+        if (NumElements == 8)  return MVT::v8bf16;
+        if (NumElements == 16) return MVT::v16bf16;
+        if (NumElements == 32) return MVT::v32bf16;
+        break;
       case MVT::f32:
         if (NumElements == 1)    return MVT::v1f32;
         if (NumElements == 2)    return MVT::v2f32;
@@ -1069,6 +1119,11 @@ namespace llvm {
           if (NumElements == 4)  return MVT::nxv4f16;
           if (NumElements == 8)  return MVT::nxv8f16;
           break;
+        case MVT::bf16:
+          if (NumElements == 2)  return MVT::nxv2bf16;
+          if (NumElements == 4)  return MVT::nxv4bf16;
+          if (NumElements == 8)  return MVT::nxv8bf16;
+          break;
         case MVT::f32:
           if (NumElements == 1)  return MVT::nxv1f32;
           if (NumElements == 2)  return MVT::nxv2f32;

diff  --git a/llvm/lib/CodeGen/ValueTypes.cpp b/llvm/lib/CodeGen/ValueTypes.cpp
index e24ad844a62c..2b97e9d83dd0 100644
--- a/llvm/lib/CodeGen/ValueTypes.cpp
+++ b/llvm/lib/CodeGen/ValueTypes.cpp
@@ -147,6 +147,7 @@ std::string EVT::getEVTString() const {
     if (isFloatingPoint())
       return "f" + utostr(getSizeInBits());
     llvm_unreachable("Invalid EVT!");
+  case MVT::bf16:    return "bf16";
   case MVT::ppcf128: return "ppcf128";
   case MVT::isVoid:  return "isVoid";
   case MVT::Other:   return "ch";
@@ -174,6 +175,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
   case MVT::i64:     return Type::getInt64Ty(Context);
   case MVT::i128:    return IntegerType::get(Context, 128);
   case MVT::f16:     return Type::getHalfTy(Context);
+  case MVT::bf16:     return Type::getBFloatTy(Context);
   case MVT::f32:     return Type::getFloatTy(Context);
   case MVT::f64:     return Type::getDoubleTy(Context);
   case MVT::f80:     return Type::getX86_FP80Ty(Context);
@@ -236,6 +238,12 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
   case MVT::v8f16:   return VectorType::get(Type::getHalfTy(Context), 8);
   case MVT::v16f16:  return VectorType::get(Type::getHalfTy(Context), 16);
   case MVT::v32f16:  return VectorType::get(Type::getHalfTy(Context), 32);
+  case MVT::v2bf16:  return VectorType::get(Type::getBFloatTy(Context), 2);
+  case MVT::v3bf16:  return VectorType::get(Type::getBFloatTy(Context), 3);
+  case MVT::v4bf16:  return VectorType::get(Type::getBFloatTy(Context), 4);
+  case MVT::v8bf16:  return VectorType::get(Type::getBFloatTy(Context), 8);
+  case MVT::v16bf16: return VectorType::get(Type::getBFloatTy(Context), 16);
+  case MVT::v32bf16: return VectorType::get(Type::getBFloatTy(Context), 32);
   case MVT::v1f32:   return VectorType::get(Type::getFloatTy(Context), 1);
   case MVT::v2f32:   return VectorType::get(Type::getFloatTy(Context), 2);
   case MVT::v3f32:   return VectorType::get(Type::getFloatTy(Context), 3);
@@ -321,6 +329,12 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const {
     return VectorType::get(Type::getHalfTy(Context), 4, /*Scalable=*/ true);
   case MVT::nxv8f16:
     return VectorType::get(Type::getHalfTy(Context), 8, /*Scalable=*/ true);
+  case MVT::nxv2bf16:
+    return VectorType::get(Type::getBFloatTy(Context), 2, /*Scalable=*/ true);
+  case MVT::nxv4bf16:
+    return VectorType::get(Type::getBFloatTy(Context), 4, /*Scalable=*/ true);
+  case MVT::nxv8bf16:
+    return VectorType::get(Type::getBFloatTy(Context), 8, /*Scalable=*/ true);
   case MVT::nxv1f32:
     return VectorType::get(Type::getFloatTy(Context), 1, /*Scalable=*/ true);
   case MVT::nxv2f32:
@@ -356,6 +370,7 @@ MVT MVT::getVT(Type *Ty, bool HandleUnknown){
   case Type::IntegerTyID:
     return getIntegerVT(cast<IntegerType>(Ty)->getBitWidth());
   case Type::HalfTyID:      return MVT(MVT::f16);
+  case Type::BFloatTyID:    return MVT(MVT::bf16);
   case Type::FloatTyID:     return MVT(MVT::f32);
   case Type::DoubleTyID:    return MVT(MVT::f64);
   case Type::X86_FP80TyID:  return MVT(MVT::f80);

diff  --git a/llvm/utils/TableGen/CodeGenTarget.cpp b/llvm/utils/TableGen/CodeGenTarget.cpp
index e0470e4266f8..282e62cf838e 100644
--- a/llvm/utils/TableGen/CodeGenTarget.cpp
+++ b/llvm/utils/TableGen/CodeGenTarget.cpp
@@ -69,6 +69,7 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::fAny:     return "MVT::fAny";
   case MVT::vAny:     return "MVT::vAny";
   case MVT::f16:      return "MVT::f16";
+  case MVT::bf16:     return "MVT::bf16";
   case MVT::f32:      return "MVT::f32";
   case MVT::f64:      return "MVT::f64";
   case MVT::f80:      return "MVT::f80";
@@ -132,6 +133,12 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::v8f16:    return "MVT::v8f16";
   case MVT::v16f16:   return "MVT::v16f16";
   case MVT::v32f16:   return "MVT::v32f16";
+  case MVT::v2bf16:   return "MVT::v2bf16";
+  case MVT::v3bf16:   return "MVT::v3bf16";
+  case MVT::v4bf16:   return "MVT::v4bf16";
+  case MVT::v8bf16:   return "MVT::v8bf16";
+  case MVT::v16bf16:  return "MVT::v16bf16";
+  case MVT::v32bf16:  return "MVT::v32bf16";
   case MVT::v1f32:    return "MVT::v1f32";
   case MVT::v2f32:    return "MVT::v2f32";
   case MVT::v3f32:    return "MVT::v3f32";
@@ -182,6 +189,9 @@ StringRef llvm::getEnumName(MVT::SimpleValueType T) {
   case MVT::nxv2f16:  return "MVT::nxv2f16";
   case MVT::nxv4f16:  return "MVT::nxv4f16";
   case MVT::nxv8f16:  return "MVT::nxv8f16";
+  case MVT::nxv2bf16:  return "MVT::nxv2bf16";
+  case MVT::nxv4bf16:  return "MVT::nxv4bf16";
+  case MVT::nxv8bf16:  return "MVT::nxv8bf16";
   case MVT::nxv1f32:  return "MVT::nxv1f32";
   case MVT::nxv2f32:  return "MVT::nxv2f32";
   case MVT::nxv4f32:  return "MVT::nxv4f32";