[llvm] [clang] [RISCV] Introduce and use BF16 in Xsfvfwmaccqqq intrinsics (PR #71140)

Sun Nov 5 18:12:27 PST 2023

https://github.com/sunshaoce updated https://github.com/llvm/llvm-project/pull/71140

>From d8d0fcdd00b422e48af733ef638fe9857a05686e Mon Sep 17 00:00:00 2001
From: Shao-Ce SUN <sunshaoce at outlook.com>
Date: Fri, 3 Nov 2023 11:50:58 +0800
Subject: [PATCH 1/3] [RISCV][Clang] Add bf16-type vector support for RVV

---
 clang/include/clang/AST/Type.h                |  4 +--
 clang/include/clang/Basic/RISCVVTypes.def     | 35 +++++++++++++++----
 .../clang/Basic/riscv_vector_common.td        |  1 +
 .../clang/Support/RISCVVIntrinsicUtils.h      | 11 +++---
 clang/lib/AST/ASTContext.cpp                  | 12 +++++--
 clang/lib/AST/Type.cpp                        |  7 ++--
 clang/lib/Sema/SemaRISCVVectorLookup.cpp      |  3 ++
 clang/lib/Support/RISCVVIntrinsicUtils.cpp    | 25 +++++++++++++
 .../rvv-intrinsic-datatypes.cpp               | 13 +++++++
 clang/test/Sema/riscv-types.c                 | 19 ++++++++++
 clang/utils/TableGen/RISCVVEmitter.cpp        |  8 +++--
 11 files changed, 117 insertions(+), 21 deletions(-)

diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index f64cd5e0ef64910..f99c4faa7170527 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -7294,7 +7294,7 @@ inline bool Type::isRVVType() const {
 inline bool Type::isRVVType(unsigned ElementCount) const {
   bool Ret = false;
 #define RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned,   \
-                        IsFP)                                                  \
+                        IsFP, IsBF)                                            \
   if (NumEls == ElementCount)                                                  \
     Ret |= isSpecificBuiltinType(BuiltinType::Id);
 #include "clang/Basic/RISCVVTypes.def"
@@ -7305,7 +7305,7 @@ inline bool Type::isRVVType(unsigned Bitwidth, bool IsFloat) const {
   bool Ret = false;
 #define RVV_TYPE(Name, Id, SingletonId)
 #define RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned,   \
-                        IsFP)                                                  \
+                        IsFP, IsBF)                                            \
   if (ElBits == Bitwidth && IsFloat == IsFP)                                   \
     Ret |= isSpecificBuiltinType(BuiltinType::Id);
 #include "clang/Basic/RISCVVTypes.def"
diff --git a/clang/include/clang/Basic/RISCVVTypes.def b/clang/include/clang/Basic/RISCVVTypes.def
index 575bca58b51e023..af44cdcd53e5bd0 100644
--- a/clang/include/clang/Basic/RISCVVTypes.def
+++ b/clang/include/clang/Basic/RISCVVTypes.def
@@ -12,7 +12,8 @@
 //   A builtin type that has not been covered by any other #define
 //   Defining this macro covers all the builtins.
 //
-// - RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, IsSigned, IsFP)
+// - RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, IsSigned, IsFP,
+// IsBF)
 //   A RISC-V V scalable vector.
 //
 // - RVV_PREDICATE_TYPE(Name, Id, SingletonId, NumEls)
@@ -45,7 +46,8 @@
 #endif
 
 #ifndef RVV_VECTOR_TYPE
-#define RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned, IsFP)\
+#define RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned,   \
+                        IsFP, IsBF)                                            \
   RVV_TYPE(Name, Id, SingletonId)
 #endif
 
@@ -55,13 +57,20 @@
 #endif
 
 #ifndef RVV_VECTOR_TYPE_INT
-#define RVV_VECTOR_TYPE_INT(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned) \
-  RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned, false)
+#define RVV_VECTOR_TYPE_INT(Name, Id, SingletonId, NumEls, ElBits, NF,         \
+                            IsSigned)                                          \
+  RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned, false,  \
+                  false)
 #endif
 
 #ifndef RVV_VECTOR_TYPE_FLOAT
-#define RVV_VECTOR_TYPE_FLOAT(Name, Id, SingletonId, NumEls, ElBits, NF) \
-  RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, false, true)
+#define RVV_VECTOR_TYPE_FLOAT(Name, Id, SingletonId, NumEls, ElBits, NF)       \
+  RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, false, true, false)
+#endif
+
+#ifndef RVV_VECTOR_TYPE_BFLOAT
+#define RVV_VECTOR_TYPE_BFLOAT(Name, Id, SingletonId, NumEls, ElBits, NF)      \
+  RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, false, false, true)
 #endif
 
 //===- Vector types -------------------------------------------------------===//
@@ -125,6 +134,19 @@ RVV_VECTOR_TYPE_FLOAT("__rvv_float16m2_t", RvvFloat16m2, RvvFloat16m2Ty, 8,  16,
 RVV_VECTOR_TYPE_FLOAT("__rvv_float16m4_t", RvvFloat16m4, RvvFloat16m4Ty, 16, 16, 1)
 RVV_VECTOR_TYPE_FLOAT("__rvv_float16m8_t", RvvFloat16m8, RvvFloat16m8Ty, 32, 16, 1)
 
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf4_t", RvvBFloat16mf4, RvvBFloat16mf4Ty,
+                       1, 16, 1)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16mf2_t", RvvBFloat16mf2, RvvBFloat16mf2Ty,
+                       2, 16, 1)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m1_t", RvvBFloat16m1, RvvBFloat16m1Ty, 4,
+                       16, 1)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m2_t", RvvBFloat16m2, RvvBFloat16m2Ty, 8,
+                       16, 1)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m4_t", RvvBFloat16m4, RvvBFloat16m4Ty, 16,
+                       16, 1)
+RVV_VECTOR_TYPE_BFLOAT("__rvv_bfloat16m8_t", RvvBFloat16m8, RvvBFloat16m8Ty, 32,
+                       16, 1)
+
 RVV_VECTOR_TYPE_FLOAT("__rvv_float32mf2_t",RvvFloat32mf2,RvvFloat32mf2Ty,1,  32, 1)
 RVV_VECTOR_TYPE_FLOAT("__rvv_float32m1_t", RvvFloat32m1, RvvFloat32m1Ty, 2,  32, 1)
 RVV_VECTOR_TYPE_FLOAT("__rvv_float32m2_t", RvvFloat32m2, RvvFloat32m2Ty, 4,  32, 1)
@@ -430,6 +452,7 @@ RVV_VECTOR_TYPE_FLOAT("__rvv_float64m2x4_t", RvvFloat64m2x4, RvvFloat64m2x4Ty, 2
 
 RVV_VECTOR_TYPE_FLOAT("__rvv_float64m4x2_t", RvvFloat64m4x2, RvvFloat64m4x2Ty, 4, 64, 2)
 
+#undef RVV_VECTOR_TYPE_BFLOAT
 #undef RVV_VECTOR_TYPE_FLOAT
 #undef RVV_VECTOR_TYPE_INT
 #undef RVV_VECTOR_TYPE
diff --git a/clang/include/clang/Basic/riscv_vector_common.td b/clang/include/clang/Basic/riscv_vector_common.td
index 326c3883f0a8409..4036ce8e6903f42 100644
--- a/clang/include/clang/Basic/riscv_vector_common.td
+++ b/clang/include/clang/Basic/riscv_vector_common.td
@@ -41,6 +41,7 @@
 //   x: float16_t (half)
 //   f: float32_t (float)
 //   d: float64_t (double)
+//   b: bfloat16_t (bfloat16)
 //
 // This way, given an LMUL, a record with a TypeRange "sil" will cause the
 // definition of 3 builtins. Each type "t" in the TypeRange (in this example
diff --git a/clang/include/clang/Support/RISCVVIntrinsicUtils.h b/clang/include/clang/Support/RISCVVIntrinsicUtils.h
index 7904658576e5d50..cd620a8fb2b5c14 100644
--- a/clang/include/clang/Support/RISCVVIntrinsicUtils.h
+++ b/clang/include/clang/Support/RISCVVIntrinsicUtils.h
@@ -207,10 +207,11 @@ enum class BasicType : uint8_t {
   Int16 = 1 << 1,
   Int32 = 1 << 2,
   Int64 = 1 << 3,
-  Float16 = 1 << 4,
-  Float32 = 1 << 5,
-  Float64 = 1 << 6,
-  MaxOffset = 6,
+  BFloat16 = 1 << 4,
+  Float16 = 1 << 5,
+  Float32 = 1 << 6,
+  Float64 = 1 << 7,
+  MaxOffset = 7,
   LLVM_MARK_AS_BITMASK_ENUM(Float64),
 };
 
@@ -225,6 +226,7 @@ enum ScalarTypeKind : uint8_t {
   SignedInteger,
   UnsignedInteger,
   Float,
+  BFloat,
   Invalid,
   Undefined,
 };
@@ -300,6 +302,7 @@ class RVVType {
     return isVector() && ElementBitwidth == Width;
   }
   bool isFloat() const { return ScalarType == ScalarTypeKind::Float; }
+  bool isBFloat() const { return ScalarType == ScalarTypeKind::BFloat; }
   bool isSignedInteger() const {
     return ScalarType == ScalarTypeKind::SignedInteger;
   }
diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp
index 1cb81cffd37ea58..a781a7d5a8638cc 100644
--- a/clang/lib/AST/ASTContext.cpp
+++ b/clang/lib/AST/ASTContext.cpp
@@ -2177,7 +2177,7 @@ TypeInfo ASTContext::getTypeInfoImpl(const Type *T) const {
     break;
 #include "clang/Basic/PPCTypes.def"
 #define RVV_VECTOR_TYPE(Name, Id, SingletonId, ElKind, ElBits, NF, IsSigned,   \
-                        IsFP)                                                  \
+                        IsFP, IsBF)                                            \
   case BuiltinType::Id:                                                        \
     Width = 0;                                                                 \
     Align = ElBits;                                                            \
@@ -3939,6 +3939,9 @@ ASTContext::getBuiltinVectorTypeInfo(const BuiltinType *Ty) const {
   case BuiltinType::Id:                                                        \
     return {ElBits == 16 ? Float16Ty : (ElBits == 32 ? FloatTy : DoubleTy),    \
             llvm::ElementCount::getScalable(NumEls), NF};
+#define RVV_VECTOR_TYPE_BFLOAT(Name, Id, SingletonId, NumEls, ElBits, NF)      \
+  case BuiltinType::Id:                                                        \
+    return {BFloat16Ty, llvm::ElementCount::getScalable(NumEls), NF};
 #define RVV_PREDICATE_TYPE(Name, Id, SingletonId, NumEls)                      \
   case BuiltinType::Id:                                                        \
     return {BoolTy, llvm::ElementCount::getScalable(NumEls), 1};
@@ -3986,11 +3989,14 @@ QualType ASTContext::getScalableVectorType(QualType EltTy, unsigned NumElts,
   } else if (Target->hasRISCVVTypes()) {
     uint64_t EltTySize = getTypeSize(EltTy);
 #define RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned,   \
-                        IsFP)                                                  \
+                        IsFP, IsBF)                                            \
   if (!EltTy->isBooleanType() &&                                               \
       ((EltTy->hasIntegerRepresentation() &&                                   \
         EltTy->hasSignedIntegerRepresentation() == IsSigned) ||                \
-       (EltTy->hasFloatingRepresentation() && IsFP)) &&                        \
+       (EltTy->hasFloatingRepresentation() && !EltTy->isBFloat16Type() &&      \
+        IsFP && !IsBF) ||                                                      \
+       (EltTy->hasFloatingRepresentation() && EltTy->isBFloat16Type() &&       \
+        IsBF && !IsFP)) &&                                                     \
       EltTySize == ElBits && NumElts == NumEls && NumFields == NF)             \
     return SingletonId;
 #define RVV_PREDICATE_TYPE(Name, Id, SingletonId, NumEls)                      \
diff --git a/clang/lib/AST/Type.cpp b/clang/lib/AST/Type.cpp
index d1cbfbd150ba53f..df56544b871e22a 100644
--- a/clang/lib/AST/Type.cpp
+++ b/clang/lib/AST/Type.cpp
@@ -2475,9 +2475,10 @@ QualType Type::getSveEltType(const ASTContext &Ctx) const {
 bool Type::isRVVVLSBuiltinType() const {
   if (const BuiltinType *BT = getAs<BuiltinType>()) {
     switch (BT->getKind()) {
-#define RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned, IsFP) \
-    case BuiltinType::Id: \
-      return NF == 1;
+#define RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned,   \
+                        IsFP, IsBF)                                            \
+  case BuiltinType::Id:                                                        \
+    return NF == 1;
 #include "clang/Basic/RISCVVTypes.def"
     default:
       return false;
diff --git a/clang/lib/Sema/SemaRISCVVectorLookup.cpp b/clang/lib/Sema/SemaRISCVVectorLookup.cpp
index 8e72eba1ac4c56f..9a5aecf669a07df 100644
--- a/clang/lib/Sema/SemaRISCVVectorLookup.cpp
+++ b/clang/lib/Sema/SemaRISCVVectorLookup.cpp
@@ -117,6 +117,9 @@ static QualType RVVType2Qual(ASTContext &Context, const RVVType *Type) {
   case ScalarTypeKind::UnsignedInteger:
     QT = Context.getIntTypeForBitwidth(Type->getElementBitwidth(), false);
     break;
+  case ScalarTypeKind::BFloat:
+    QT = Context.BFloat16Ty;
+    break;
   case ScalarTypeKind::Float:
     switch (Type->getElementBitwidth()) {
     case 64:
diff --git a/clang/lib/Support/RISCVVIntrinsicUtils.cpp b/clang/lib/Support/RISCVVIntrinsicUtils.cpp
index 751d0aedacc9a1f..78d49f15732a11e 100644
--- a/clang/lib/Support/RISCVVIntrinsicUtils.cpp
+++ b/clang/lib/Support/RISCVVIntrinsicUtils.cpp
@@ -101,6 +101,7 @@ RVVType::RVVType(BasicType BT, int Log2LMUL,
 // double    | N/A    | N/A      | N/A     | nxv1f64 | nxv2f64  | nxv4f64  | nxv8f64
 // float     | N/A    | N/A      | nxv1f32 | nxv2f32 | nxv4f32  | nxv8f32  | nxv16f32
 // half      | N/A    | nxv1f16  | nxv2f16 | nxv4f16 | nxv8f16  | nxv16f16 | nxv32f16
+// bfloat16  | N/A    | nxv1bf16 | nxv2bf16| nxv4bf16| nxv8bf16 | nxv16bf16| nxv32bf16
 // clang-format on
 
 bool RVVType::verifyType() const {
@@ -112,6 +113,8 @@ bool RVVType::verifyType() const {
     return false;
   if (isFloat() && ElementBitwidth == 8)
     return false;
+  if (isBFloat() && ElementBitwidth != 16)
+    return false;
   if (IsTuple && (NF == 1 || NF > 8))
     return false;
   if (IsTuple && (1 << std::max(0, LMUL.Log2LMUL)) * NF > 8)
@@ -199,6 +202,9 @@ void RVVType::initBuiltinStr() {
       llvm_unreachable("Unhandled ElementBitwidth!");
     }
     break;
+  case ScalarTypeKind::BFloat:
+    BuiltinStr += "b";
+    break;
   default:
     llvm_unreachable("ScalarType is invalid!");
   }
@@ -234,6 +240,9 @@ void RVVType::initClangBuiltinStr() {
   case ScalarTypeKind::Float:
     ClangBuiltinStr += "float";
     break;
+  case ScalarTypeKind::BFloat:
+    ClangBuiltinStr += "bfloat";
+    break;
   case ScalarTypeKind::SignedInteger:
     ClangBuiltinStr += "int";
     break;
@@ -300,6 +309,15 @@ void RVVType::initTypeStr() {
     } else
       Str += getTypeString("float");
     break;
+  case ScalarTypeKind::BFloat:
+    if (isScalar()) {
+      if (ElementBitwidth == 16)
+        Str += "__bf16";
+      else
+        llvm_unreachable("Unhandled floating type.");
+    } else
+      Str += getTypeString("bfloat");
+    break;
   case ScalarTypeKind::SignedInteger:
     Str += getTypeString("int");
     break;
@@ -322,6 +340,9 @@ void RVVType::initShortStr() {
   case ScalarTypeKind::Float:
     ShortStr = "f" + utostr(ElementBitwidth);
     break;
+  case ScalarTypeKind::BFloat:
+    ShortStr = "bf" + utostr(ElementBitwidth);
+    break;
   case ScalarTypeKind::SignedInteger:
     ShortStr = "i" + utostr(ElementBitwidth);
     break;
@@ -373,6 +394,10 @@ void RVVType::applyBasicType() {
     ElementBitwidth = 64;
     ScalarType = ScalarTypeKind::Float;
     break;
+  case BasicType::BFloat16:
+    ElementBitwidth = 16;
+    ScalarType = ScalarTypeKind::BFloat;
+    break;
   default:
     llvm_unreachable("Unhandled type code!");
   }
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-intrinsic-datatypes.cpp b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-intrinsic-datatypes.cpp
index b6f6d0e28c62000..06b80a81ca63391 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-intrinsic-datatypes.cpp
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-intrinsic-datatypes.cpp
@@ -64,6 +64,12 @@
 // CHECK-NEXT:    [[F16M2:%.*]] = alloca <vscale x 8 x half>, align 2
 // CHECK-NEXT:    [[F16M4:%.*]] = alloca <vscale x 16 x half>, align 2
 // CHECK-NEXT:    [[F16M8:%.*]] = alloca <vscale x 32 x half>, align 2
+// CHECK-NEXT:    [[BF16MF4:%.*]] = alloca <vscale x 1 x bfloat>, align 2
+// CHECK-NEXT:    [[BF16MF2:%.*]] = alloca <vscale x 2 x bfloat>, align 2
+// CHECK-NEXT:    [[BF16M1:%.*]] = alloca <vscale x 4 x bfloat>, align 2
+// CHECK-NEXT:    [[BF16M2:%.*]] = alloca <vscale x 8 x bfloat>, align 2
+// CHECK-NEXT:    [[BF16M4:%.*]] = alloca <vscale x 16 x bfloat>, align 2
+// CHECK-NEXT:    [[BF16M8:%.*]] = alloca <vscale x 32 x bfloat>, align 2
 // CHECK-NEXT:    [[F32MF2:%.*]] = alloca <vscale x 1 x float>, align 4
 // CHECK-NEXT:    [[F32M1:%.*]] = alloca <vscale x 2 x float>, align 4
 // CHECK-NEXT:    [[F32M2:%.*]] = alloca <vscale x 4 x float>, align 4
@@ -366,6 +372,13 @@ void foo () {
   vfloat16m4_t f16m4;
   vfloat16m8_t f16m8;
 
+  vbfloat16mf4_t bf16mf4;
+  vbfloat16mf2_t bf16mf2;
+  vbfloat16m1_t bf16m1;
+  vbfloat16m2_t bf16m2;
+  vbfloat16m4_t bf16m4;
+  vbfloat16m8_t bf16m8;
+
   vfloat32mf2_t f32mf2;
   vfloat32m1_t f32m1;
   vfloat32m2_t f32m2;
diff --git a/clang/test/Sema/riscv-types.c b/clang/test/Sema/riscv-types.c
index 1372dc990c4d640..cbeafe9d9e99686 100644
--- a/clang/test/Sema/riscv-types.c
+++ b/clang/test/Sema/riscv-types.c
@@ -136,6 +136,25 @@ void bar(void) {
 
   // CHECK: __rvv_int32m1x2_t x44;
   __rvv_int32m1x2_t x44;
+
+  // CHECK: __rvv_bfloat16m1_t x45;
+  __rvv_bfloat16m1_t x45;
+
+  // CHECK: __rvv_bfloat16m2_t x46;
+  __rvv_bfloat16m2_t x46;
+
+  // CHECK: __rvv_bfloat16m4_t x47;
+  __rvv_bfloat16m4_t x47;
+
+  // CHECK: __rvv_bfloat16m8_t x48;
+  __rvv_bfloat16m8_t x48;
+
+  // CHECK: __rvv_bfloat16mf4_t x49;
+  __rvv_bfloat16mf4_t x49;
+
+  // CHECK: __rvv_bfloat16mf2_t x50;
+  __rvv_bfloat16mf2_t x50;
+
 }
 
 typedef __rvv_bool4_t vbool4_t;
diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp
index 64eaf91d99a43ba..95b80e07e836e52 100644
--- a/clang/utils/TableGen/RISCVVEmitter.cpp
+++ b/clang/utils/TableGen/RISCVVEmitter.cpp
@@ -151,7 +151,9 @@ static BasicType ParseBasicType(char c) {
   case 'd':
     return BasicType::Float64;
     break;
-
+  case 'b':
+    return BasicType::BFloat16;
+    break;
   default:
     return BasicType::Unknown;
   }
@@ -389,8 +391,8 @@ void RVVEmitter::createHeader(raw_ostream &OS) {
     }
   }
 
-  for (BasicType BT :
-       {BasicType::Float16, BasicType::Float32, BasicType::Float64}) {
+  for (BasicType BT : {BasicType::Float16, BasicType::Float32,
+                       BasicType::Float64, BasicType::BFloat16}) {
     for (int Log2LMUL : Log2LMULs) {
       auto T = TypeCache.computeType(BT, Log2LMUL, PrototypeDescriptor::Vector);
       if (T)

>From 272af7bf9c6311708ab2b31d8f5aa604457b2cf2 Mon Sep 17 00:00:00 2001
From: Shao-Ce SUN <sunshaoce at outlook.com>
Date: Fri, 3 Nov 2023 12:31:38 +0800
Subject: [PATCH 2/3] [RISCV] Use BF16 in Xsfvfwmaccqqq intrinsics

BF16 implementation based on @joshua-arch1's https://reviews.llvm.org/D152498
Fixed the incorrect f16 type introduced in https://github.com/llvm/llvm-project/pull/68296
---------
Co-authored-by: un Sha (Joshua) <cooper.joshua at linux.alibaba.com>
---
 .../clang/Basic/riscv_sifive_vector.td        |  4 +--
 .../non-overloaded/sf_vfwmacc_4x4x4.c         | 30 +++++++++----------
 .../non-policy/overloaded/sf_vfwmacc_4x4x4.c  | 30 +++++++++----------
 .../policy/non-overloaded/sf_vfwmacc_4x4x4.c  | 30 +++++++++----------
 .../policy/overloaded/sf_vfwmacc_4x4x4.c      | 30 +++++++++----------
 clang/test/Sema/rvv-required-features.c       |  2 +-
 llvm/lib/Support/RISCVISAInfo.cpp             |  2 +-
 llvm/lib/Target/RISCV/RISCVFeatures.td        |  2 +-
 8 files changed, 65 insertions(+), 65 deletions(-)

diff --git a/clang/include/clang/Basic/riscv_sifive_vector.td b/clang/include/clang/Basic/riscv_sifive_vector.td
index 1e081c734d4941b..d4c22769d9b95ae 100644
--- a/clang/include/clang/Basic/riscv_sifive_vector.td
+++ b/clang/include/clang/Basic/riscv_sifive_vector.td
@@ -109,7 +109,7 @@ multiclass RVVVFWMACCBuiltinSet<list<list<string>> suffixes_prototypes> {
       Name = NAME,
       HasMasked = false,
       Log2LMUL = [-2, -1, 0, 1, 2] in
-    defm NAME : RVVOutOp1Op2BuiltinSet<NAME, "x", suffixes_prototypes>;
+    defm NAME : RVVOutOp1Op2BuiltinSet<NAME, "b", suffixes_prototypes>;
 }
 
 multiclass RVVVQMACCBuiltinSet<list<list<string>> suffixes_prototypes> {
@@ -146,7 +146,7 @@ let UnMaskedPolicyScheme = HasPolicyOperand in
 
 let UnMaskedPolicyScheme = HasPolicyOperand in
   let RequiredFeatures = ["Xsfvfwmaccqqq"] in
-    defm sf_vfwmacc_4x4x4 : RVVVFWMACCBuiltinSet<[["", "w", "wwSvv"]]>;
+    defm sf_vfwmacc_4x4x4 : RVVVFWMACCBuiltinSet<[["", "Fw", "FwFwSvv"]]>;
 
 let UnMaskedPolicyScheme = HasPassthruOperand, RequiredFeatures = ["Xsfvfnrclipxfqf"] in {
 let ManualCodegen = [{
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vfwmacc_4x4x4.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vfwmacc_4x4x4.c
index 185b8f236b62a8d..0a08798ef50371c 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vfwmacc_4x4x4.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vfwmacc_4x4x4.c
@@ -7,51 +7,51 @@
 #include <sifive_vector.h>
 
 // CHECK-RV64-LABEL: define dso_local <vscale x 1 x float> @test_sf_vfwmacc_4x4x4_f32mf2
-// CHECK-RV64-SAME: (<vscale x 1 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-SAME: (<vscale x 1 x float> [[VD:%.*]], <vscale x 4 x bfloat> [[VS1:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16.i64(<vscale x 1 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 1 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4bf16.nxv1bf16.i64(<vscale x 1 x float> [[VD]], <vscale x 4 x bfloat> [[VS1]], <vscale x 1 x bfloat> [[VS2]], i64 [[VL]], i64 3)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
-vfloat32mf2_t test_sf_vfwmacc_4x4x4_f32mf2(vfloat32mf2_t vd, vfloat16m1_t vs1, vfloat16mf4_t vs2, size_t vl) {
+vfloat32mf2_t test_sf_vfwmacc_4x4x4_f32mf2(vfloat32mf2_t vd, vbfloat16m1_t vs1, vbfloat16mf4_t vs2, size_t vl) {
   return __riscv_sf_vfwmacc_4x4x4_f32mf2(vd, vs1, vs2, vl);
 }
 
 // CHECK-RV64-LABEL: define dso_local <vscale x 2 x float> @test_sf_vfwmacc_4x4x4_f32m1
-// CHECK-RV64-SAME: (<vscale x 2 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-SAME: (<vscale x 2 x float> [[VD:%.*]], <vscale x 4 x bfloat> [[VS1:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16.i64(<vscale x 2 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 2 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4bf16.nxv2bf16.i64(<vscale x 2 x float> [[VD]], <vscale x 4 x bfloat> [[VS1]], <vscale x 2 x bfloat> [[VS2]], i64 [[VL]], i64 3)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
-vfloat32m1_t test_sf_vfwmacc_4x4x4_f32m1(vfloat32m1_t vd, vfloat16m1_t vs1, vfloat16mf2_t vs2, size_t vl) {
+vfloat32m1_t test_sf_vfwmacc_4x4x4_f32m1(vfloat32m1_t vd, vbfloat16m1_t vs1, vbfloat16mf2_t vs2, size_t vl) {
   return __riscv_sf_vfwmacc_4x4x4_f32m1(vd, vs1, vs2, vl);
 }
 
 // CHECK-RV64-LABEL: define dso_local <vscale x 4 x float> @test_sf_vfwmacc_4x4x4_f32m2
-// CHECK-RV64-SAME: (<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-SAME: (<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x bfloat> [[VS1:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 4 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4bf16.nxv4bf16.i64(<vscale x 4 x float> [[VD]], <vscale x 4 x bfloat> [[VS1]], <vscale x 4 x bfloat> [[VS2]], i64 [[VL]], i64 3)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-vfloat32m2_t test_sf_vfwmacc_4x4x4_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
+vfloat32m2_t test_sf_vfwmacc_4x4x4_f32m2(vfloat32m2_t vd, vbfloat16m1_t vs1, vbfloat16m1_t vs2, size_t vl) {
   return __riscv_sf_vfwmacc_4x4x4_f32m2(vd, vs1, vs2, vl);
 }
 
 // CHECK-RV64-LABEL: define dso_local <vscale x 8 x float> @test_sf_vfwmacc_4x4x4_f32m4
-// CHECK-RV64-SAME: (<vscale x 8 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-SAME: (<vscale x 8 x float> [[VD:%.*]], <vscale x 4 x bfloat> [[VS1:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16.i64(<vscale x 8 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 8 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4bf16.nxv8bf16.i64(<vscale x 8 x float> [[VD]], <vscale x 4 x bfloat> [[VS1]], <vscale x 8 x bfloat> [[VS2]], i64 [[VL]], i64 3)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
-vfloat32m4_t test_sf_vfwmacc_4x4x4_f32m4(vfloat32m4_t vd, vfloat16m1_t vs1, vfloat16m2_t vs2, size_t vl) {
+vfloat32m4_t test_sf_vfwmacc_4x4x4_f32m4(vfloat32m4_t vd, vbfloat16m1_t vs1, vbfloat16m2_t vs2, size_t vl) {
   return __riscv_sf_vfwmacc_4x4x4_f32m4(vd, vs1, vs2, vl);
 }
 
 // CHECK-RV64-LABEL: define dso_local <vscale x 16 x float> @test_sf_vfwmacc_4x4x4_f32m8
-// CHECK-RV64-SAME: (<vscale x 16 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-SAME: (<vscale x 16 x float> [[VD:%.*]], <vscale x 4 x bfloat> [[VS1:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16.i64(<vscale x 16 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 16 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4bf16.nxv16bf16.i64(<vscale x 16 x float> [[VD]], <vscale x 4 x bfloat> [[VS1]], <vscale x 16 x bfloat> [[VS2]], i64 [[VL]], i64 3)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
-vfloat32m8_t test_sf_vfwmacc_4x4x4_f32m8(vfloat32m8_t vd, vfloat16m1_t vs1, vfloat16m4_t vs2, size_t vl) {
+vfloat32m8_t test_sf_vfwmacc_4x4x4_f32m8(vfloat32m8_t vd, vbfloat16m1_t vs1, vbfloat16m4_t vs2, size_t vl) {
   return __riscv_sf_vfwmacc_4x4x4_f32m8(vd, vs1, vs2, vl);
 }
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/sf_vfwmacc_4x4x4.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/sf_vfwmacc_4x4x4.c
index a07782821327cec..abd3fd5ceccf439 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/sf_vfwmacc_4x4x4.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/sf_vfwmacc_4x4x4.c
@@ -7,51 +7,51 @@
 #include <sifive_vector.h>
 
 // CHECK-RV64-LABEL: define dso_local <vscale x 1 x float> @test_sf_vfwmacc_4x4x4_f32mf2
-// CHECK-RV64-SAME: (<vscale x 1 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-SAME: (<vscale x 1 x float> [[VD:%.*]], <vscale x 4 x bfloat> [[VS1:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16.i64(<vscale x 1 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 1 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4bf16.nxv1bf16.i64(<vscale x 1 x float> [[VD]], <vscale x 4 x bfloat> [[VS1]], <vscale x 1 x bfloat> [[VS2]], i64 [[VL]], i64 3)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
-vfloat32mf2_t test_sf_vfwmacc_4x4x4_f32mf2(vfloat32mf2_t vd, vfloat16m1_t vs1, vfloat16mf4_t vs2, size_t vl) {
+vfloat32mf2_t test_sf_vfwmacc_4x4x4_f32mf2(vfloat32mf2_t vd, vbfloat16m1_t vs1, vbfloat16mf4_t vs2, size_t vl) {
   return __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
 }
 
 // CHECK-RV64-LABEL: define dso_local <vscale x 2 x float> @test_sf_vfwmacc_4x4x4_f32m1
-// CHECK-RV64-SAME: (<vscale x 2 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-SAME: (<vscale x 2 x float> [[VD:%.*]], <vscale x 4 x bfloat> [[VS1:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16.i64(<vscale x 2 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 2 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4bf16.nxv2bf16.i64(<vscale x 2 x float> [[VD]], <vscale x 4 x bfloat> [[VS1]], <vscale x 2 x bfloat> [[VS2]], i64 [[VL]], i64 3)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
-vfloat32m1_t test_sf_vfwmacc_4x4x4_f32m1(vfloat32m1_t vd, vfloat16m1_t vs1, vfloat16mf2_t vs2, size_t vl) {
+vfloat32m1_t test_sf_vfwmacc_4x4x4_f32m1(vfloat32m1_t vd, vbfloat16m1_t vs1, vbfloat16mf2_t vs2, size_t vl) {
   return __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
 }
 
 // CHECK-RV64-LABEL: define dso_local <vscale x 4 x float> @test_sf_vfwmacc_4x4x4_f32m2
-// CHECK-RV64-SAME: (<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-SAME: (<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x bfloat> [[VS1:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 4 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4bf16.nxv4bf16.i64(<vscale x 4 x float> [[VD]], <vscale x 4 x bfloat> [[VS1]], <vscale x 4 x bfloat> [[VS2]], i64 [[VL]], i64 3)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-vfloat32m2_t test_sf_vfwmacc_4x4x4_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
+vfloat32m2_t test_sf_vfwmacc_4x4x4_f32m2(vfloat32m2_t vd, vbfloat16m1_t vs1, vbfloat16m1_t vs2, size_t vl) {
   return __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
 }
 
 // CHECK-RV64-LABEL: define dso_local <vscale x 8 x float> @test_sf_vfwmacc_4x4x4_f32m4
-// CHECK-RV64-SAME: (<vscale x 8 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-SAME: (<vscale x 8 x float> [[VD:%.*]], <vscale x 4 x bfloat> [[VS1:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16.i64(<vscale x 8 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 8 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4bf16.nxv8bf16.i64(<vscale x 8 x float> [[VD]], <vscale x 4 x bfloat> [[VS1]], <vscale x 8 x bfloat> [[VS2]], i64 [[VL]], i64 3)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
-vfloat32m4_t test_sf_vfwmacc_4x4x4_f32m4(vfloat32m4_t vd, vfloat16m1_t vs1, vfloat16m2_t vs2, size_t vl) {
+vfloat32m4_t test_sf_vfwmacc_4x4x4_f32m4(vfloat32m4_t vd, vbfloat16m1_t vs1, vbfloat16m2_t vs2, size_t vl) {
   return __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
 }
 
 // CHECK-RV64-LABEL: define dso_local <vscale x 16 x float> @test_sf_vfwmacc_4x4x4_f32m8
-// CHECK-RV64-SAME: (<vscale x 16 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-SAME: (<vscale x 16 x float> [[VD:%.*]], <vscale x 4 x bfloat> [[VS1:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16.i64(<vscale x 16 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 16 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4bf16.nxv16bf16.i64(<vscale x 16 x float> [[VD]], <vscale x 4 x bfloat> [[VS1]], <vscale x 16 x bfloat> [[VS2]], i64 [[VL]], i64 3)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
-vfloat32m8_t test_sf_vfwmacc_4x4x4_f32m8(vfloat32m8_t vd, vfloat16m1_t vs1, vfloat16m4_t vs2, size_t vl) {
+vfloat32m8_t test_sf_vfwmacc_4x4x4_f32m8(vfloat32m8_t vd, vbfloat16m1_t vs1, vbfloat16m4_t vs2, size_t vl) {
   return __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
 }
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/sf_vfwmacc_4x4x4.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/sf_vfwmacc_4x4x4.c
index e55ecb324c74011..da68eacf3217e63 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/sf_vfwmacc_4x4x4.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/sf_vfwmacc_4x4x4.c
@@ -7,51 +7,51 @@
 #include <sifive_vector.h>
 
 // CHECK-RV64-LABEL: define dso_local <vscale x 1 x float> @test_sf_vfwmacc_4x4x4_f32mf2_tu
-// CHECK-RV64-SAME: (<vscale x 1 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-SAME: (<vscale x 1 x float> [[VD:%.*]], <vscale x 4 x bfloat> [[VS1:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16.i64(<vscale x 1 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 1 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4bf16.nxv1bf16.i64(<vscale x 1 x float> [[VD]], <vscale x 4 x bfloat> [[VS1]], <vscale x 1 x bfloat> [[VS2]], i64 [[VL]], i64 2)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
-vfloat32mf2_t test_sf_vfwmacc_4x4x4_f32mf2_tu(vfloat32mf2_t vd, vfloat16m1_t vs1, vfloat16mf4_t vs2, size_t vl) {
+vfloat32mf2_t test_sf_vfwmacc_4x4x4_f32mf2_tu(vfloat32mf2_t vd, vbfloat16m1_t vs1, vbfloat16mf4_t vs2, size_t vl) {
   return __riscv_sf_vfwmacc_4x4x4_f32mf2_tu(vd, vs1, vs2, vl);
 }
 
 // CHECK-RV64-LABEL: define dso_local <vscale x 2 x float> @test_sf_vfwmacc_4x4x4_f32m1
-// CHECK-RV64-SAME: (<vscale x 2 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-SAME: (<vscale x 2 x float> [[VD:%.*]], <vscale x 4 x bfloat> [[VS1:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16.i64(<vscale x 2 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 2 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4bf16.nxv2bf16.i64(<vscale x 2 x float> [[VD]], <vscale x 4 x bfloat> [[VS1]], <vscale x 2 x bfloat> [[VS2]], i64 [[VL]], i64 2)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
-vfloat32m1_t test_sf_vfwmacc_4x4x4_f32m1(vfloat32m1_t vd, vfloat16m1_t vs1, vfloat16mf2_t vs2, size_t vl) {
+vfloat32m1_t test_sf_vfwmacc_4x4x4_f32m1(vfloat32m1_t vd, vbfloat16m1_t vs1, vbfloat16mf2_t vs2, size_t vl) {
   return __riscv_sf_vfwmacc_4x4x4_f32m1_tu(vd, vs1, vs2, vl);
 }
 
 // CHECK-RV64-LABEL: define dso_local <vscale x 4 x float> @test_sf_vfwmacc_4x4x4_f32m2
-// CHECK-RV64-SAME: (<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-SAME: (<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x bfloat> [[VS1:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 4 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4bf16.nxv4bf16.i64(<vscale x 4 x float> [[VD]], <vscale x 4 x bfloat> [[VS1]], <vscale x 4 x bfloat> [[VS2]], i64 [[VL]], i64 2)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-vfloat32m2_t test_sf_vfwmacc_4x4x4_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
+vfloat32m2_t test_sf_vfwmacc_4x4x4_f32m2(vfloat32m2_t vd, vbfloat16m1_t vs1, vbfloat16m1_t vs2, size_t vl) {
   return __riscv_sf_vfwmacc_4x4x4_f32m2_tu(vd, vs1, vs2, vl);
 }
 
 // CHECK-RV64-LABEL: define dso_local <vscale x 8 x float> @test_sf_vfwmacc_4x4x4_f32m4
-// CHECK-RV64-SAME: (<vscale x 8 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-SAME: (<vscale x 8 x float> [[VD:%.*]], <vscale x 4 x bfloat> [[VS1:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16.i64(<vscale x 8 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 8 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4bf16.nxv8bf16.i64(<vscale x 8 x float> [[VD]], <vscale x 4 x bfloat> [[VS1]], <vscale x 8 x bfloat> [[VS2]], i64 [[VL]], i64 2)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
-vfloat32m4_t test_sf_vfwmacc_4x4x4_f32m4(vfloat32m4_t vd, vfloat16m1_t vs1, vfloat16m2_t vs2, size_t vl) {
+vfloat32m4_t test_sf_vfwmacc_4x4x4_f32m4(vfloat32m4_t vd, vbfloat16m1_t vs1, vbfloat16m2_t vs2, size_t vl) {
   return __riscv_sf_vfwmacc_4x4x4_f32m4_tu(vd, vs1, vs2, vl);
 }
 
 // CHECK-RV64-LABEL: define dso_local <vscale x 16 x float> @test_sf_vfwmacc_4x4x4_f32m8
-// CHECK-RV64-SAME: (<vscale x 16 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-SAME: (<vscale x 16 x float> [[VD:%.*]], <vscale x 4 x bfloat> [[VS1:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16.i64(<vscale x 16 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 16 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4bf16.nxv16bf16.i64(<vscale x 16 x float> [[VD]], <vscale x 4 x bfloat> [[VS1]], <vscale x 16 x bfloat> [[VS2]], i64 [[VL]], i64 2)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
-vfloat32m8_t test_sf_vfwmacc_4x4x4_f32m8(vfloat32m8_t vd, vfloat16m1_t vs1, vfloat16m4_t vs2, size_t vl) {
+vfloat32m8_t test_sf_vfwmacc_4x4x4_f32m8(vfloat32m8_t vd, vbfloat16m1_t vs1, vbfloat16m4_t vs2, size_t vl) {
   return __riscv_sf_vfwmacc_4x4x4_f32m8_tu(vd, vs1, vs2, vl);
 }
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/sf_vfwmacc_4x4x4.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/sf_vfwmacc_4x4x4.c
index a7b26b3de36f90e..6bd52585c7bed91 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/sf_vfwmacc_4x4x4.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/sf_vfwmacc_4x4x4.c
@@ -7,51 +7,51 @@
 #include <sifive_vector.h>
 
 // CHECK-RV64-LABEL: define dso_local <vscale x 1 x float> @test_sf_vfwmacc_4x4x4_f32mf2_tu
-// CHECK-RV64-SAME: (<vscale x 1 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-SAME: (<vscale x 1 x float> [[VD:%.*]], <vscale x 4 x bfloat> [[VS1:%.*]], <vscale x 1 x bfloat> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16.i64(<vscale x 1 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 1 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4bf16.nxv1bf16.i64(<vscale x 1 x float> [[VD]], <vscale x 4 x bfloat> [[VS1]], <vscale x 1 x bfloat> [[VS2]], i64 [[VL]], i64 2)
 // CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
 //
-vfloat32mf2_t test_sf_vfwmacc_4x4x4_f32mf2_tu(vfloat32mf2_t vd, vfloat16m1_t vs1, vfloat16mf4_t vs2, size_t vl) {
+vfloat32mf2_t test_sf_vfwmacc_4x4x4_f32mf2_tu(vfloat32mf2_t vd, vbfloat16m1_t vs1, vbfloat16mf4_t vs2, size_t vl) {
   return __riscv_sf_vfwmacc_4x4x4_tu(vd, vs1, vs2, vl);
 }
 
 // CHECK-RV64-LABEL: define dso_local <vscale x 2 x float> @test_sf_vfwmacc_4x4x4_f32m1
-// CHECK-RV64-SAME: (<vscale x 2 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-SAME: (<vscale x 2 x float> [[VD:%.*]], <vscale x 4 x bfloat> [[VS1:%.*]], <vscale x 2 x bfloat> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16.i64(<vscale x 2 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 2 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4bf16.nxv2bf16.i64(<vscale x 2 x float> [[VD]], <vscale x 4 x bfloat> [[VS1]], <vscale x 2 x bfloat> [[VS2]], i64 [[VL]], i64 2)
 // CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
 //
-vfloat32m1_t test_sf_vfwmacc_4x4x4_f32m1(vfloat32m1_t vd, vfloat16m1_t vs1, vfloat16mf2_t vs2, size_t vl) {
+vfloat32m1_t test_sf_vfwmacc_4x4x4_f32m1(vfloat32m1_t vd, vbfloat16m1_t vs1, vbfloat16mf2_t vs2, size_t vl) {
   return __riscv_sf_vfwmacc_4x4x4_tu(vd, vs1, vs2, vl);
 }
 
 // CHECK-RV64-LABEL: define dso_local <vscale x 4 x float> @test_sf_vfwmacc_4x4x4_f32m2
-// CHECK-RV64-SAME: (<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-SAME: (<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x bfloat> [[VS1:%.*]], <vscale x 4 x bfloat> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 4 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4bf16.nxv4bf16.i64(<vscale x 4 x float> [[VD]], <vscale x 4 x bfloat> [[VS1]], <vscale x 4 x bfloat> [[VS2]], i64 [[VL]], i64 2)
 // CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
 //
-vfloat32m2_t test_sf_vfwmacc_4x4x4_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
+vfloat32m2_t test_sf_vfwmacc_4x4x4_f32m2(vfloat32m2_t vd, vbfloat16m1_t vs1, vbfloat16m1_t vs2, size_t vl) {
   return __riscv_sf_vfwmacc_4x4x4_tu(vd, vs1, vs2, vl);
 }
 
 // CHECK-RV64-LABEL: define dso_local <vscale x 8 x float> @test_sf_vfwmacc_4x4x4_f32m4
-// CHECK-RV64-SAME: (<vscale x 8 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-SAME: (<vscale x 8 x float> [[VD:%.*]], <vscale x 4 x bfloat> [[VS1:%.*]], <vscale x 8 x bfloat> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16.i64(<vscale x 8 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 8 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4bf16.nxv8bf16.i64(<vscale x 8 x float> [[VD]], <vscale x 4 x bfloat> [[VS1]], <vscale x 8 x bfloat> [[VS2]], i64 [[VL]], i64 2)
 // CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
 //
-vfloat32m4_t test_sf_vfwmacc_4x4x4_f32m4(vfloat32m4_t vd, vfloat16m1_t vs1, vfloat16m2_t vs2, size_t vl) {
+vfloat32m4_t test_sf_vfwmacc_4x4x4_f32m4(vfloat32m4_t vd, vbfloat16m1_t vs1, vbfloat16m2_t vs2, size_t vl) {
   return __riscv_sf_vfwmacc_4x4x4_tu(vd, vs1, vs2, vl);
 }
 
 // CHECK-RV64-LABEL: define dso_local <vscale x 16 x float> @test_sf_vfwmacc_4x4x4_f32m8
-// CHECK-RV64-SAME: (<vscale x 16 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-SAME: (<vscale x 16 x float> [[VD:%.*]], <vscale x 4 x bfloat> [[VS1:%.*]], <vscale x 16 x bfloat> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
 // CHECK-RV64-NEXT:  entry:
-// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16.i64(<vscale x 16 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 16 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4bf16.nxv16bf16.i64(<vscale x 16 x float> [[VD]], <vscale x 4 x bfloat> [[VS1]], <vscale x 16 x bfloat> [[VS2]], i64 [[VL]], i64 2)
 // CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
 //
-vfloat32m8_t test_sf_vfwmacc_4x4x4_f32m8(vfloat32m8_t vd, vfloat16m1_t vs1, vfloat16m4_t vs2, size_t vl) {
+vfloat32m8_t test_sf_vfwmacc_4x4x4_f32m8(vfloat32m8_t vd, vbfloat16m1_t vs1, vbfloat16m4_t vs2, size_t vl) {
   return __riscv_sf_vfwmacc_4x4x4_tu(vd, vs1, vs2, vl);
 }
diff --git a/clang/test/Sema/rvv-required-features.c b/clang/test/Sema/rvv-required-features.c
index ab1582bdb775c51..a88b0429456e50c 100644
--- a/clang/test/Sema/rvv-required-features.c
+++ b/clang/test/Sema/rvv-required-features.c
@@ -29,7 +29,7 @@ void test_xsfvqmaccqoq(vint32m1_t vd, vint8m1_t vs1, vint8m1_t vs2, size_t vl) {
   __riscv_sf_vqmacc_4x8x4(vd, vs1, vs2, vl);
 }
 
-void test_xsfvfwmaccqqq(vfloat32m1_t vd, vfloat16m1_t vs1, vfloat16mf2_t vs2, size_t vl) {
+void test_xsfvfwmaccqqq(vfloat32m1_t vd, vbfloat16m1_t vs1, vbfloat16mf2_t vs2, size_t vl) {
   __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
 }
 
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 158ad6fe1d9ca5d..6da5a10e7a866c6 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -996,7 +996,7 @@ static const char *ImpliedExtsV[] = {"zvl128b", "zve64d"};
 static const char *ImpliedExtsXTHeadVdot[] = {"v"};
 static const char *ImpliedExtsXsfvcp[] = {"zve32x"};
 static const char *ImpliedExtsXsfvfnrclipxfqf[] = {"zve32f"};
-static const char *ImpliedExtsXsfvfwmaccqqq[] = {"zve32f"};
+static const char *ImpliedExtsXsfvfwmaccqqq[] = {"zve32f", "zvfbfmin"};
 static const char *ImpliedExtsXsfvqmaccdod[] = {"zve32x"};
 static const char *ImpliedExtsXsfvqmaccqoq[] = {"zve32x"};
 static const char *ImpliedExtsZacas[] = {"a"};
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 1bcf190a583de18..c7967483395d25d 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -839,7 +839,7 @@ def HasVendorXSfvqmaccqoq : Predicate<"Subtarget->hasVendorXSfvqmaccqoq()">,
 def FeatureVendorXSfvfwmaccqqq
     : SubtargetFeature<"xsfvfwmaccqqq", "HasVendorXSfvfwmaccqqq", "true",
                        "'XSfvfwmaccqqq' (SiFive Matrix Multiply Accumulate Instruction and 4-by-4))",
-                       [FeatureStdExtZve32f]>;
+                       [FeatureStdExtZve32f, FeatureStdExtZvfbfmin]>;
 def HasVendorXSfvfwmaccqqq : Predicate<"Subtarget->hasVendorXSfvfwmaccqqq()">,
                          AssemblerPredicate<(all_of FeatureVendorXSfvfwmaccqqq),
                          "'XSfvfwmaccqqq' (SiFive Matrix Multiply Accumulate Instruction and 4-by-4))">;

>From 92188bd64f0da1d3f06c1896cda4a8fc89455f30 Mon Sep 17 00:00:00 2001
From: sunshaoce <sunshaoce at gmail.com>
Date: Mon, 6 Nov 2023 10:09:09 +0800
Subject: [PATCH 3/3] fixup! Update `Sema::checkRVVTypeSupport`

---
 clang/include/clang/AST/Type.h                             | 7 ++++---
 clang/lib/Sema/SemaChecking.cpp                            | 7 +++++++
 .../non-policy/non-overloaded/sf_vfwmacc_4x4x4.c           | 2 +-
 .../non-policy/overloaded/sf_vfwmacc_4x4x4.c               | 2 +-
 .../policy/non-overloaded/sf_vfwmacc_4x4x4.c               | 2 +-
 .../policy/overloaded/sf_vfwmacc_4x4x4.c                   | 2 +-
 .../rvv-intrinsics-handcrafted/rvv-intrinsic-datatypes.cpp | 2 +-
 clang/test/Sema/riscv-types.c                              | 2 +-
 clang/test/Sema/rvv-required-features.c                    | 2 +-
 9 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/clang/include/clang/AST/Type.h b/clang/include/clang/AST/Type.h
index f99c4faa7170527..d339a93d2f8b5a4 100644
--- a/clang/include/clang/AST/Type.h
+++ b/clang/include/clang/AST/Type.h
@@ -2386,7 +2386,7 @@ class alignas(TypeAlignment) Type : public ExtQualsTypeCommonBase {
 
   bool isRVVType() const;
 
-  bool isRVVType(unsigned Bitwidth, bool IsFloat) const;
+  bool isRVVType(unsigned Bitwidth, bool IsFloat, bool IsBFloat = false) const;
 
   /// Return the implicit lifetime for this type, which must not be dependent.
   Qualifiers::ObjCLifetime getObjCARCImplicitLifetime() const;
@@ -7301,12 +7301,13 @@ inline bool Type::isRVVType(unsigned ElementCount) const {
   return Ret;
 }
 
-inline bool Type::isRVVType(unsigned Bitwidth, bool IsFloat) const {
+inline bool Type::isRVVType(unsigned Bitwidth, bool IsFloat,
+                            bool IsBFloat) const {
   bool Ret = false;
 #define RVV_TYPE(Name, Id, SingletonId)
 #define RVV_VECTOR_TYPE(Name, Id, SingletonId, NumEls, ElBits, NF, IsSigned,   \
                         IsFP, IsBF)                                            \
-  if (ElBits == Bitwidth && IsFloat == IsFP)                                   \
+  if (ElBits == Bitwidth && IsFloat == IsFP && IsBFloat == IsBF)               \
     Ret |= isSpecificBuiltinType(BuiltinType::Id);
 #include "clang/Basic/RISCVVTypes.def"
   return Ret;
diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp
index 12915a32d02ea08..e235fd763e7ee44 100644
--- a/clang/lib/Sema/SemaChecking.cpp
+++ b/clang/lib/Sema/SemaChecking.cpp
@@ -6046,6 +6046,13 @@ void Sema::checkRVVTypeSupport(QualType Ty, SourceLocation Loc, Decl *D) {
       !TI.hasFeature("zvfh") && !TI.hasFeature("zvfhmin"))
     Diag(Loc, diag::err_riscv_type_requires_extension, D)
         << Ty << "zvfh or zvfhmin";
+  // Check if enabled zfbfmin/zvfbfmin for BFloat16
+  if (Ty->isRVVType(/* Bitwidth */ 16, /* IsFloat */ false,
+                    /* IsBFloat */ true) &&
+      !TI.hasFeature("experimental-zfbfmin") &&
+      !TI.hasFeature("experimental-zvfbfmin"))
+    Diag(Loc, diag::err_riscv_type_requires_extension, D)
+        << Ty << "experimental-zfbfmin or experimental-zvfbfmin";
   if (Ty->isRVVType(/* Bitwidth */ 32, /* IsFloat */ true) &&
       !TI.hasFeature("zve32f"))
     Diag(Loc, diag::err_riscv_type_requires_extension, D) << Ty << "zve32f";
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vfwmacc_4x4x4.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vfwmacc_4x4x4.c
index 0a08798ef50371c..fc4819dd889cc99 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vfwmacc_4x4x4.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vfwmacc_4x4x4.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvfh -target-feature +xsfvfwmaccqqq \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +experimental-zvfbfmin -target-feature +xsfvfwmaccqqq \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | \
 // RUN:   FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/sf_vfwmacc_4x4x4.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/sf_vfwmacc_4x4x4.c
index abd3fd5ceccf439..90debdbd1a8fe65 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/sf_vfwmacc_4x4x4.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/sf_vfwmacc_4x4x4.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvfh -target-feature +xsfvfwmaccqqq \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +experimental-zvfbfmin -target-feature +xsfvfwmaccqqq \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | \
 // RUN:   FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/sf_vfwmacc_4x4x4.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/sf_vfwmacc_4x4x4.c
index da68eacf3217e63..74b7be11ec1ab76 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/sf_vfwmacc_4x4x4.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/sf_vfwmacc_4x4x4.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvfh -target-feature +xsfvfwmaccqqq \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +experimental-zvfbfmin -target-feature +xsfvfwmaccqqq \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | \
 // RUN:   FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/sf_vfwmacc_4x4x4.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/sf_vfwmacc_4x4x4.c
index 6bd52585c7bed91..42e6700184e0d26 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/sf_vfwmacc_4x4x4.c
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/sf_vfwmacc_4x4x4.c
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvfh -target-feature +xsfvfwmaccqqq \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +experimental-zvfbfmin -target-feature +xsfvfwmaccqqq \
 // RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | \
 // RUN:   FileCheck --check-prefix=CHECK-RV64 %s
 
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-intrinsic-datatypes.cpp b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-intrinsic-datatypes.cpp
index 06b80a81ca63391..054d09e7853c393 100644
--- a/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-intrinsic-datatypes.cpp
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-handcrafted/rvv-intrinsic-datatypes.cpp
@@ -1,6 +1,6 @@
 // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvfh \
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvfh -target-feature +experimental-zvfbfmin \
 // RUN: -O0 -emit-llvm %s -o - | FileCheck %s
 
 #include <riscv_vector.h>
diff --git a/clang/test/Sema/riscv-types.c b/clang/test/Sema/riscv-types.c
index cbeafe9d9e99686..ac2b72baf0aab21 100644
--- a/clang/test/Sema/riscv-types.c
+++ b/clang/test/Sema/riscv-types.c
@@ -1,5 +1,5 @@
 // RUN: %clang_cc1 -triple riscv64 -target-feature +v \
-// RUN: -target-feature +zvfh -ast-print %s | FileCheck %s
+// RUN: -target-feature +zvfh -target-feature +experimental-zvfbfmin -ast-print %s | FileCheck %s
 
 void bar(void) {
   // CHECK: __rvv_int64m1_t x0;
diff --git a/clang/test/Sema/rvv-required-features.c b/clang/test/Sema/rvv-required-features.c
index a88b0429456e50c..2714ef04b9bf226 100644
--- a/clang/test/Sema/rvv-required-features.c
+++ b/clang/test/Sema/rvv-required-features.c
@@ -1,7 +1,7 @@
 // REQUIRES: riscv-registered-target
 // RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +xsfvcp \
 // RUN:     -target-feature +xsfvqmaccdod -target-feature +xsfvqmaccqoq \
-// RUN:     -target-feature +zvfh -target-feature +xsfvfwmaccqqq \
+// RUN:     -target-feature +experimental-zvfbfmin -target-feature +xsfvfwmaccqqq \
 // RUN:     -target-feature +xsfvfnrclipxfqf %s -fsyntax-only -verify
 
 // expected-no-diagnostics