[llvm] [RISCV] Support Xsfvfwmaccqqq extensions (PR #68296)

Thu Oct 5 03:01:22 PDT 2023

https://github.com/4vtomat created https://github.com/llvm/llvm-project/pull/68296

Bfloat16 Matrix Multiply Accumulate Instruction
https://sifive.cdn.prismic.io/sifive/c391d53e-ffcf-4091-82f6-c37bf3e883ed_xsfvfwmaccqqq-spec.pdf


>From fc484770303cf50819e09dafd0f4f00760e67e3c Mon Sep 17 00:00:00 2001
From: Brandon Wu <brandon.wu at sifive.com>
Date: Wed, 4 Oct 2023 10:23:52 -0700
Subject: [PATCH] [RISCV] Support Xsfvfwmaccqqq extensions

Bfloat16 Matrix Multiply Accumulate Instruction
https://sifive.cdn.prismic.io/sifive/c391d53e-ffcf-4091-82f6-c37bf3e883ed_xsfvfwmaccqqq-spec.pdf
---
 .../clang/Basic/riscv_sifive_vector.td        |  12 ++
 .../clang/Support/RISCVVIntrinsicUtils.h      |  17 +-
 clang/lib/Sema/SemaRISCVVectorLookup.cpp      |   1 +
 .../non-overloaded/sf_vfwmacc_4x4x4.c         |  57 +++++
 .../non-policy/overloaded/sf_vfwmacc_4x4x4.c  |  57 +++++
 .../policy/non-overloaded/sf_vfwmacc_4x4x4.c  |  57 +++++
 .../policy/overloaded/sf_vfwmacc_4x4x4.c      |  57 +++++
 .../test/Sema/rvv-required-features-invalid.c |   4 +
 clang/test/Sema/rvv-required-features.c       |   7 +-
 clang/utils/TableGen/RISCVVEmitter.cpp        |   1 +
 llvm/include/llvm/IR/IntrinsicsRISCVXsf.td    |  11 +
 llvm/lib/Support/RISCVISAInfo.cpp             |   3 +
 .../RISCV/Disassembler/RISCVDisassembler.cpp  |   2 +
 llvm/lib/Target/RISCV/RISCVFeatures.td        |   8 +
 llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td    |  53 +++++
 .../CodeGen/RISCV/rvv/sf_vfwmacc_4x4x4.ll     | 195 ++++++++++++++++++
 llvm/test/MC/RISCV/rvv/xsfvfwmacc.s           |  15 ++
 llvm/unittests/Support/RISCVISAInfoTest.cpp   |   1 +
 18 files changed, 549 insertions(+), 9 deletions(-)
 create mode 100644 clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vfwmacc_4x4x4.c
 create mode 100644 clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/sf_vfwmacc_4x4x4.c
 create mode 100644 clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/sf_vfwmacc_4x4x4.c
 create mode 100644 clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/sf_vfwmacc_4x4x4.c
 create mode 100644 llvm/test/CodeGen/RISCV/rvv/sf_vfwmacc_4x4x4.ll
 create mode 100644 llvm/test/MC/RISCV/rvv/xsfvfwmacc.s

diff --git a/clang/include/clang/Basic/riscv_sifive_vector.td b/clang/include/clang/Basic/riscv_sifive_vector.td
index 6583a7eb7b2e59b..d9be21a2a602944 100644
--- a/clang/include/clang/Basic/riscv_sifive_vector.td
+++ b/clang/include/clang/Basic/riscv_sifive_vector.td
@@ -103,3 +103,15 @@ let SupportOverloading = false in {
     defm sf_vc_v_fvw : RVVVCIXBuiltinSet<["si"],  "UwKzUwUvFe", [-1, 0, 2, 3], UseGPR=0>;
   }
 }
+
+multiclass RVVVFWMACCBuiltinSet<list<list<string>> suffixes_prototypes> {
+  let OverloadedName = NAME,
+      Name = NAME,
+      HasMasked = false,
+      Log2LMUL = [-2, -1, 0, 1, 2] in
+    defm NAME : RVVOutOp1Op2BuiltinSet<NAME, "x", suffixes_prototypes>;
+}
+
+let UnMaskedPolicyScheme = HasPolicyOperand in
+  let RequiredFeatures = ["Xsfvfwmaccqqq"] in
+    defm sf_vfwmacc_4x4x4 : RVVVFWMACCBuiltinSet<[["", "w", "wwSvv"]]>;
diff --git a/clang/include/clang/Support/RISCVVIntrinsicUtils.h b/clang/include/clang/Support/RISCVVIntrinsicUtils.h
index 8ba57d77221dc52..c6ce4884cdbb005 100644
--- a/clang/include/clang/Support/RISCVVIntrinsicUtils.h
+++ b/clang/include/clang/Support/RISCVVIntrinsicUtils.h
@@ -485,14 +485,15 @@ enum RVVRequire : uint16_t {
   RVV_REQ_RV64 = 1 << 0,
   RVV_REQ_ZvfhminOrZvfh = 1 << 1,
   RVV_REQ_Xsfvcp = 1 << 2,
-  RVV_REQ_Zvbb = 1 << 3,
-  RVV_REQ_Zvbc = 1 << 4,
-  RVV_REQ_Zvkb = 1 << 5,
-  RVV_REQ_Zvkg = 1 << 6,
-  RVV_REQ_Zvkned = 1 << 7,
-  RVV_REQ_Zvknha = 1 << 8,
-  RVV_REQ_Zvksed = 1 << 9,
-  RVV_REQ_Zvksh = 1 << 10,
+  RVV_REQ_Xsfvfwmaccqqq = 1 << 3,
+  RVV_REQ_Zvbb = 1 << 4,
+  RVV_REQ_Zvbc = 1 << 5,
+  RVV_REQ_Zvkb = 1 << 6,
+  RVV_REQ_Zvkg = 1 << 7,
+  RVV_REQ_Zvkned = 1 << 8,
+  RVV_REQ_Zvknha = 1 << 9,
+  RVV_REQ_Zvksed = 1 << 10,
+  RVV_REQ_Zvksh = 1 << 11,
 
   LLVM_MARK_AS_BITMASK_ENUM(RVV_REQ_Zvksh)
 };
diff --git a/clang/lib/Sema/SemaRISCVVectorLookup.cpp b/clang/lib/Sema/SemaRISCVVectorLookup.cpp
index ae584dc68719901..97ebeed8ca4375d 100644
--- a/clang/lib/Sema/SemaRISCVVectorLookup.cpp
+++ b/clang/lib/Sema/SemaRISCVVectorLookup.cpp
@@ -205,6 +205,7 @@ void RISCVIntrinsicManagerImpl::ConstructRVVIntrinsics(
   static const std::pair<const char *, RVVRequire> FeatureCheckList[] = {
       {"64bit", RVV_REQ_RV64},
       {"xsfvcp", RVV_REQ_Xsfvcp},
+      {"xsfvfwmaccqqq", RVV_REQ_Xsfvfwmaccqqq},
       {"experimental-zvbb", RVV_REQ_Zvbb},
       {"experimental-zvbc", RVV_REQ_Zvbc},
       {"experimental-zvkb", RVV_REQ_Zvkb},
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vfwmacc_4x4x4.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vfwmacc_4x4x4.c
new file mode 100644
index 000000000000000..185b8f236b62a8d
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/non-overloaded/sf_vfwmacc_4x4x4.c
@@ -0,0 +1,57 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvfh -target-feature +xsfvfwmaccqqq \
+// RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x float> @test_sf_vfwmacc_4x4x4_f32mf2
+// CHECK-RV64-SAME: (<vscale x 1 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16.i64(<vscale x 1 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 1 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
+//
+vfloat32mf2_t test_sf_vfwmacc_4x4x4_f32mf2(vfloat32mf2_t vd, vfloat16m1_t vs1, vfloat16mf4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32mf2(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x float> @test_sf_vfwmacc_4x4x4_f32m1
+// CHECK-RV64-SAME: (<vscale x 2 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16.i64(<vscale x 2 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 2 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
+//
+vfloat32m1_t test_sf_vfwmacc_4x4x4_f32m1(vfloat32m1_t vd, vfloat16m1_t vs1, vfloat16mf2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m1(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x float> @test_sf_vfwmacc_4x4x4_f32m2
+// CHECK-RV64-SAME: (<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 4 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+vfloat32m2_t test_sf_vfwmacc_4x4x4_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m2(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x float> @test_sf_vfwmacc_4x4x4_f32m4
+// CHECK-RV64-SAME: (<vscale x 8 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16.i64(<vscale x 8 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 8 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
+//
+vfloat32m4_t test_sf_vfwmacc_4x4x4_f32m4(vfloat32m4_t vd, vfloat16m1_t vs1, vfloat16m2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x float> @test_sf_vfwmacc_4x4x4_f32m8
+// CHECK-RV64-SAME: (<vscale x 16 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16.i64(<vscale x 16 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 16 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
+//
+vfloat32m8_t test_sf_vfwmacc_4x4x4_f32m8(vfloat32m8_t vd, vfloat16m1_t vs1, vfloat16m4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m8(vd, vs1, vs2, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/sf_vfwmacc_4x4x4.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/sf_vfwmacc_4x4x4.c
new file mode 100644
index 000000000000000..a07782821327cec
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/non-policy/overloaded/sf_vfwmacc_4x4x4.c
@@ -0,0 +1,57 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvfh -target-feature +xsfvfwmaccqqq \
+// RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x float> @test_sf_vfwmacc_4x4x4_f32mf2
+// CHECK-RV64-SAME: (<vscale x 1 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16.i64(<vscale x 1 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 1 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
+//
+vfloat32mf2_t test_sf_vfwmacc_4x4x4_f32mf2(vfloat32mf2_t vd, vfloat16m1_t vs1, vfloat16mf4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x float> @test_sf_vfwmacc_4x4x4_f32m1
+// CHECK-RV64-SAME: (<vscale x 2 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16.i64(<vscale x 2 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 2 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
+//
+vfloat32m1_t test_sf_vfwmacc_4x4x4_f32m1(vfloat32m1_t vd, vfloat16m1_t vs1, vfloat16mf2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x float> @test_sf_vfwmacc_4x4x4_f32m2
+// CHECK-RV64-SAME: (<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 4 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+vfloat32m2_t test_sf_vfwmacc_4x4x4_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x float> @test_sf_vfwmacc_4x4x4_f32m4
+// CHECK-RV64-SAME: (<vscale x 8 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16.i64(<vscale x 8 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 8 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
+//
+vfloat32m4_t test_sf_vfwmacc_4x4x4_f32m4(vfloat32m4_t vd, vfloat16m1_t vs1, vfloat16m2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x float> @test_sf_vfwmacc_4x4x4_f32m8
+// CHECK-RV64-SAME: (<vscale x 16 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16.i64(<vscale x 16 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 16 x half> [[VS2]], i64 [[VL]], i64 3)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
+//
+vfloat32m8_t test_sf_vfwmacc_4x4x4_f32m8(vfloat32m8_t vd, vfloat16m1_t vs1, vfloat16m4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/sf_vfwmacc_4x4x4.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/sf_vfwmacc_4x4x4.c
new file mode 100644
index 000000000000000..e55ecb324c74011
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/non-overloaded/sf_vfwmacc_4x4x4.c
@@ -0,0 +1,57 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvfh -target-feature +xsfvfwmaccqqq \
+// RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x float> @test_sf_vfwmacc_4x4x4_f32mf2_tu
+// CHECK-RV64-SAME: (<vscale x 1 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16.i64(<vscale x 1 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 1 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
+//
+vfloat32mf2_t test_sf_vfwmacc_4x4x4_f32mf2_tu(vfloat32mf2_t vd, vfloat16m1_t vs1, vfloat16mf4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32mf2_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x float> @test_sf_vfwmacc_4x4x4_f32m1
+// CHECK-RV64-SAME: (<vscale x 2 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16.i64(<vscale x 2 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 2 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
+//
+vfloat32m1_t test_sf_vfwmacc_4x4x4_f32m1(vfloat32m1_t vd, vfloat16m1_t vs1, vfloat16mf2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m1_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x float> @test_sf_vfwmacc_4x4x4_f32m2
+// CHECK-RV64-SAME: (<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 4 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+vfloat32m2_t test_sf_vfwmacc_4x4x4_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m2_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x float> @test_sf_vfwmacc_4x4x4_f32m4
+// CHECK-RV64-SAME: (<vscale x 8 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16.i64(<vscale x 8 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 8 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
+//
+vfloat32m4_t test_sf_vfwmacc_4x4x4_f32m4(vfloat32m4_t vd, vfloat16m1_t vs1, vfloat16m2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m4_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x float> @test_sf_vfwmacc_4x4x4_f32m8
+// CHECK-RV64-SAME: (<vscale x 16 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16.i64(<vscale x 16 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 16 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
+//
+vfloat32m8_t test_sf_vfwmacc_4x4x4_f32m8(vfloat32m8_t vd, vfloat16m1_t vs1, vfloat16m4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_f32m8_tu(vd, vs1, vs2, vl);
+}
diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/sf_vfwmacc_4x4x4.c b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/sf_vfwmacc_4x4x4.c
new file mode 100644
index 000000000000000..a7b26b3de36f90e
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvv-intrinsics-autogenerated/policy/overloaded/sf_vfwmacc_4x4x4.c
@@ -0,0 +1,57 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 2
+// REQUIRES: riscv-registered-target
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +zvfh -target-feature +xsfvfwmaccqqq \
+// RUN:   -disable-O0-optnone -emit-llvm %s -o - | opt -S -passes=mem2reg | \
+// RUN:   FileCheck --check-prefix=CHECK-RV64 %s
+
+#include <sifive_vector.h>
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 1 x float> @test_sf_vfwmacc_4x4x4_f32mf2_tu
+// CHECK-RV64-SAME: (<vscale x 1 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 1 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0:[0-9]+]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16.i64(<vscale x 1 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 1 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 1 x float> [[TMP0]]
+//
+vfloat32mf2_t test_sf_vfwmacc_4x4x4_f32mf2_tu(vfloat32mf2_t vd, vfloat16m1_t vs1, vfloat16mf4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 2 x float> @test_sf_vfwmacc_4x4x4_f32m1
+// CHECK-RV64-SAME: (<vscale x 2 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 2 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16.i64(<vscale x 2 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 2 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 2 x float> [[TMP0]]
+//
+vfloat32m1_t test_sf_vfwmacc_4x4x4_f32m1(vfloat32m1_t vd, vfloat16m1_t vs1, vfloat16mf2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 4 x float> @test_sf_vfwmacc_4x4x4_f32m2
+// CHECK-RV64-SAME: (<vscale x 4 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 4 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16.i64(<vscale x 4 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 4 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 4 x float> [[TMP0]]
+//
+vfloat32m2_t test_sf_vfwmacc_4x4x4_f32m2(vfloat32m2_t vd, vfloat16m1_t vs1, vfloat16m1_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 8 x float> @test_sf_vfwmacc_4x4x4_f32m4
+// CHECK-RV64-SAME: (<vscale x 8 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 8 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16.i64(<vscale x 8 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 8 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 8 x float> [[TMP0]]
+//
+vfloat32m4_t test_sf_vfwmacc_4x4x4_f32m4(vfloat32m4_t vd, vfloat16m1_t vs1, vfloat16m2_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_tu(vd, vs1, vs2, vl);
+}
+
+// CHECK-RV64-LABEL: define dso_local <vscale x 16 x float> @test_sf_vfwmacc_4x4x4_f32m8
+// CHECK-RV64-SAME: (<vscale x 16 x float> [[VD:%.*]], <vscale x 4 x half> [[VS1:%.*]], <vscale x 16 x half> [[VS2:%.*]], i64 noundef [[VL:%.*]]) #[[ATTR0]] {
+// CHECK-RV64-NEXT:  entry:
+// CHECK-RV64-NEXT:    [[TMP0:%.*]] = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16.i64(<vscale x 16 x float> [[VD]], <vscale x 4 x half> [[VS1]], <vscale x 16 x half> [[VS2]], i64 [[VL]], i64 2)
+// CHECK-RV64-NEXT:    ret <vscale x 16 x float> [[TMP0]]
+//
+vfloat32m8_t test_sf_vfwmacc_4x4x4_f32m8(vfloat32m8_t vd, vfloat16m1_t vs1, vfloat16m4_t vs2, size_t vl) {
+  return __riscv_sf_vfwmacc_4x4x4_tu(vd, vs1, vs2, vl);
+}
diff --git a/clang/test/Sema/rvv-required-features-invalid.c b/clang/test/Sema/rvv-required-features-invalid.c
index 0d0d00764a31e39..642c4b5def1b064 100644
--- a/clang/test/Sema/rvv-required-features-invalid.c
+++ b/clang/test/Sema/rvv-required-features-invalid.c
@@ -15,3 +15,7 @@ void test_vsoxei64_v_i8m1(int8_t *base, vuint64m8_t bindex, vint8m1_t value, siz
 void test_xsfvcp_sf_vc_x_se_u64m1(uint64_t rs1, size_t vl) {
   __riscv_sf_vc_x_se_u64m1(1, 1, 1, rs1, vl); // expected-error {{call to undeclared function '__riscv_sf_vc_x_se_u64m1'}}
 }
+
+void test_xsfvfwmaccqqq() {
+  __riscv_sf_vfwmacc_4x4x4(); // expected-error {{call to undeclared function '__riscv_sf_vfwmacc_4x4x4'}}
+}
diff --git a/clang/test/Sema/rvv-required-features.c b/clang/test/Sema/rvv-required-features.c
index c3b7965599e68fb..b4942df660cc11b 100644
--- a/clang/test/Sema/rvv-required-features.c
+++ b/clang/test/Sema/rvv-required-features.c
@@ -1,5 +1,6 @@
 // REQUIRES: riscv-registered-target
-// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +xsfvcp %s -fsyntax-only -verify
+// RUN: %clang_cc1 -triple riscv64 -target-feature +v -target-feature +xsfvcp \
+// RUN:     -target-feature +zvfh -target-feature +xsfvfwmaccqqq %s -fsyntax-only -verify
 
 // expected-no-diagnostics
 
@@ -17,3 +18,7 @@ void test_vsoxei64_v_i8m1(int8_t *base, vuint64m8_t bindex, vint8m1_t value, siz
 void test_sf_vc_x_se_u64m1(uint64_t rs1, size_t vl) {
   __riscv_sf_vc_x_se_u64m1(1, 1, 1, rs1, vl);
 }
+
+void test_xsfvfwmaccqqq(vfloat32m1_t vd, vfloat16m1_t vs1, vfloat16mf2_t vs2, size_t vl) {
+  __riscv_sf_vfwmacc_4x4x4(vd, vs1, vs2, vl);
+}
diff --git a/clang/utils/TableGen/RISCVVEmitter.cpp b/clang/utils/TableGen/RISCVVEmitter.cpp
index 41025926058ed07..1c13dc6a15f08bf 100644
--- a/clang/utils/TableGen/RISCVVEmitter.cpp
+++ b/clang/utils/TableGen/RISCVVEmitter.cpp
@@ -656,6 +656,7 @@ void RVVEmitter::createRVVIntrinsics(
                                   .Case("RV64", RVV_REQ_RV64)
                                   .Case("ZvfhminOrZvfh", RVV_REQ_ZvfhminOrZvfh)
                                   .Case("Xsfvcp", RVV_REQ_Xsfvcp)
+                                  .Case("Xsfvfwmaccqqq", RVV_REQ_Xsfvfwmaccqqq)
                                   .Case("Zvbb", RVV_REQ_Zvbb)
                                   .Case("Zvbc", RVV_REQ_Zvbc)
                                   .Case("Zvkb", RVV_REQ_Zvkb)
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCVXsf.td b/llvm/include/llvm/IR/IntrinsicsRISCVXsf.td
index c8d24ec7d83addf..99824e679f44b08 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCVXsf.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCVXsf.td
@@ -128,8 +128,19 @@ let TargetPrefix = "riscv" in {
     }
   }
 
+  class RISCVSFCustomVMACC
+      : DefaultAttrsIntrinsic< [llvm_anyvector_ty],
+                   [LLVMMatchType<0>, llvm_anyvector_ty, llvm_anyvector_ty,
+                    llvm_anyint_ty, LLVMMatchType<3>],
+                   [ImmArg<ArgIndex<4>>, IntrNoMem] >, RISCVVIntrinsic {
+    let VLOperand = 3;
+  }
+
   defm "" : RISCVSFCustomVC_X<["x", "i"]>;
   defm "" : RISCVSFCustomVC_XV<["x", "i", "v", "f"]>;
   defm "" : RISCVSFCustomVC_XVV<["x", "i", "v", "f"]>;
   defm "" : RISCVSFCustomVC_XVW<["x", "i", "v", "f"]>;
+
+  // XSfvfwmaccqqq
+  def int_riscv_sf_vfwmacc_4x4x4 : RISCVSFCustomVMACC;
 } // TargetPrefix = "riscv"
diff --git a/llvm/lib/Support/RISCVISAInfo.cpp b/llvm/lib/Support/RISCVISAInfo.cpp
index 72d33e1e65c8f58..9e2faf6aff2a68e 100644
--- a/llvm/lib/Support/RISCVISAInfo.cpp
+++ b/llvm/lib/Support/RISCVISAInfo.cpp
@@ -73,6 +73,7 @@ static const RISCVSupportedExtension SupportedExtensions[] = {
     {"xcvsimd", RISCVExtensionVersion{1, 0}},
     {"xsfcie", RISCVExtensionVersion{1, 0}},
     {"xsfvcp", RISCVExtensionVersion{1, 0}},
+    {"xsfvfwmaccqqq", RISCVExtensionVersion{1, 0}},
     {"xtheadba", RISCVExtensionVersion{1, 0}},
     {"xtheadbb", RISCVExtensionVersion{1, 0}},
     {"xtheadbs", RISCVExtensionVersion{1, 0}},
@@ -991,6 +992,7 @@ static const char *ImpliedExtsF[] = {"zicsr"};
 static const char *ImpliedExtsV[] = {"zvl128b", "zve64d"};
 static const char *ImpliedExtsXTHeadVdot[] = {"v"};
 static const char *ImpliedExtsXsfvcp[] = {"zve32x"};
+static const char *ImpliedExtsXsfvfwmaccqqq[] = {"zve32x"};
 static const char *ImpliedExtsZacas[] = {"a"};
 static const char *ImpliedExtsZcb[] = {"zca"};
 static const char *ImpliedExtsZcd[] = {"d", "zca"};
@@ -1058,6 +1060,7 @@ static constexpr ImpliedExtsEntry ImpliedExts[] = {
     {{"f"}, {ImpliedExtsF}},
     {{"v"}, {ImpliedExtsV}},
     {{"xsfvcp"}, {ImpliedExtsXsfvcp}},
+    {{"xsfvfwmaccqqq"}, {ImpliedExtsXsfvfwmaccqqq}},
     {{"xtheadvdot"}, {ImpliedExtsXTHeadVdot}},
     {{"zacas"}, {ImpliedExtsZacas}},
     {{"zcb"}, {ImpliedExtsZcb}},
diff --git a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
index d561d90d3088c1a..33f654b26884935 100644
--- a/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
+++ b/llvm/lib/Target/RISCV/Disassembler/RISCVDisassembler.cpp
@@ -558,6 +558,8 @@ DecodeStatus RISCVDisassembler::getInstruction(MCInst &MI, uint64_t &Size,
                           "XTHeadVdot custom opcode table");
     TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXSfvcp, DecoderTableXSfvcp32,
                           "SiFive VCIX custom opcode table");
+    TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXSfvfwmaccqqq, DecoderTableXSfvfwmaccqqq32,
+                          "SiFive Matrix Multiplication Instruction opcode table");
     TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXSfcie, DecoderTableXSfcie32,
                           "Sifive CIE custom opcode table");
     TRY_TO_DECODE_FEATURE(RISCV::FeatureVendorXCVbitmanip,
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 3d3486b7fa89563..10f4c7040173b61 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -813,6 +813,14 @@ def HasVendorXSfcie : Predicate<"Subtarget->hasVendorXSfcie()">,
                         AssemblerPredicate<(all_of FeatureVendorXSfcie),
                         "'XSfcie' (SiFive Custom Instruction Extension SCIE.)">;
 
+def FeatureVendorXSfvfwmaccqqq
+    : SubtargetFeature<"xsfvfwmaccqqq", "HasVendorXSfvfwmaccqqq", "true",
+                       "'XSfvfwmaccqqq' (SiFive Matrix Multiply Accumulate Instruction and 4-by-4))",
+                       [FeatureStdExtZve32x]>;
+def HasVendorXSfvfwmaccqqq : Predicate<"Subtarget->hasVendorXSfvfwmaccqqq()">,
+                         AssemblerPredicate<(all_of FeatureVendorXSfvfwmaccqqq),
+                         "'XSfvfwmaccqqq' (SiFive Matrix Multiply Accumulate Instruction and 4-by-4))">;
+
 def FeatureVendorXCVbitmanip
     : SubtargetFeature<"xcvbitmanip", "HasVendorXCVbitmanip", "true",
                        "'XCVbitmanip' (CORE-V Bit Manipulation)">;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
index 3975b8426256ac7..3acf534f153090c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td
@@ -178,6 +178,19 @@ multiclass CustomSiFiveVCIX<string suffix, VCIXType type,
                                              InTyRs1, 1>;
 }
 
+let hasSideEffects = 0, mayLoad = 0, mayStore = 0 in {
+class CustomSiFiveVMACC<bits<6> funct6, RISCVVFormat opv, string opcodestr>
+    : RVInstVCCustom2<funct6{5-2}, opv.Value, (outs VR:$rd), (ins VR:$rs1, VR:$rs2),
+                      opcodestr, "$rd, $rs1, $rs2"> {
+  let vm = 1;
+  let funct6_lo2 = funct6{1-0};
+}
+}
+
+multiclass CustomSiFiveVMACC<bits<6> funct6, RISCVVFormat opv, string opcodestr> {
+  def _VV : CustomSiFiveVMACC<funct6, opv, opcodestr>;
+}
+
 let Predicates = [HasVendorXSfvcp], mayLoad = 0, mayStore = 0,
     hasSideEffects = 1, hasNoSchedulingInfo = 1, DecoderNamespace = "XSfvcp" in {
   defm X   : CustomSiFiveVCIX<"x",   VCIX_X,   uimm5, uimm5, GPR>,   Sched<[]>;
@@ -196,6 +209,10 @@ let Predicates = [HasVendorXSfvcp], mayLoad = 0, mayStore = 0,
   defm FVW : CustomSiFiveVCIX<"fvw", VCIX_XVW, VR,    VR,    FPR32>, Sched<[]>;
 }
 
+let Predicates = [HasVendorXSfvfwmaccqqq], DecoderNamespace = "XSfvfwmaccqqq" in {
+  defm VFWMACC_4x4x4 : CustomSiFiveVMACC<0b111100, OPFVV, "sf.vfwmacc.4x4x4">;
+}
+
 class VPseudoVC_X<Operand OpClass, DAGOperand RS1Class,
                   bit HasSideEffect = 1> :
       Pseudo<(outs),
@@ -318,6 +335,16 @@ multiclass VPseudoVC_XVW<LMULInfo m, DAGOperand RS1Class,
   }
 }
 
+multiclass VPseudoSiFiveVMACC<string mx, VReg vd_type, VReg vs2_type,
+                              string Constraint = ""> {
+  def "Pseudo" # NAME # "_VV_" # mx : VPseudoTernaryNoMaskWithPolicy<vd_type, V_M1.vrclass, vs2_type, Constraint>;
+}
+
+multiclass VPseudoSiFiveVFWMACC<string Constraint = ""> {
+  foreach m = MxListFW in
+    defm NAME : VPseudoSiFiveVMACC<m.MX, m.wvrclass, m.vrclass, Constraint>;
+}
+
 let Predicates = [HasVendorXSfvcp] in {
   foreach m = MxList in {
     defm X : VPseudoVC_X<m, GPR>;
@@ -346,6 +373,10 @@ let Predicates = [HasVendorXSfvcp] in {
   }
 }
 
+let Predicates = [HasVendorXSfvfwmaccqqq], DecoderNamespace = "XSfvfwmaccqqq" in {
+  defm VFWMACC_4x4x4 : VPseudoSiFiveVFWMACC;
+}
+
 class VPatVC_OP4<string intrinsic_name,
                  string inst,
                  ValueType op2_type,
@@ -476,6 +507,24 @@ class GetFTypeInfo<int Sew> {
                               !eq(Scalar, f64) : "FPR64");
 }
 
+multiclass VPatVMACC<string intrinsic, string instruction,
+                     list<VTypeInfoToWide> info_pairs, ValueType vec_m1> {
+  foreach pair = info_pairs in {
+    defvar VdInfo = pair.Wti;
+    defvar Vs2Info = pair.Vti;
+    let Predicates = [HasVInstructions] in
+    def : VPatTernaryNoMaskWithPolicy<"int_riscv_sf_" # intrinsic,
+                                      "Pseudo" # instruction, "VV", VdInfo.Vector,
+                                      vec_m1, Vs2Info.Vector,
+                                      Vs2Info.Log2SEW, Vs2Info.LMul,
+                                      VdInfo.RegClass, VR, Vs2Info.RegClass>;
+  }
+}
+
+multiclass VPatVFWMACC<string intrinsic, string instruction> {
+  defm : VPatVMACC<intrinsic, instruction, AllWidenableBFloatToFloatVectors, vbfloat16m1_t>;
+}
+
 let Predicates = [HasVendorXSfvcp] in {
   foreach vti = AllVectors in {
     defm : VPatVC_X<"x", "X", vti, XLenVT, GPR>;
@@ -511,6 +560,10 @@ let Predicates = [HasVendorXSfvcp] in {
   }
 }
 
+let Predicates = [HasVendorXSfvfwmaccqqq] in {
+  defm : VPatVFWMACC<"vfwmacc_4x4x4", "VFWMACC_4x4x4">;
+}
+
 let Predicates = [HasVendorXSfcie] in {
 let hasSideEffects = 1, mayLoad = 0, mayStore = 0, DecoderNamespace = "XSfcie" in {
 def SF_CFLUSH_D_L1 : RVInstI<0b000, OPC_SYSTEM, (outs), (ins GPR:$rs1), "cflush.d.l1","$rs1">,
diff --git a/llvm/test/CodeGen/RISCV/rvv/sf_vfwmacc_4x4x4.ll b/llvm/test/CodeGen/RISCV/rvv/sf_vfwmacc_4x4x4.ll
new file mode 100644
index 000000000000000..0ba92c5f70e6eef
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/sf_vfwmacc_4x4x4.ll
@@ -0,0 +1,195 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+experimental-zvfbfmin,+xsfvfwmaccqqq \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+experimental-zvfbfmin,+xsfvfwmaccqqq \
+; RUN:   -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
+
+declare <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16(
+  <vscale x 1 x float>,
+  <vscale x 4 x bfloat>,
+  <vscale x 1 x bfloat>,
+  iXLen, iXLen);
+
+define <vscale x 1 x float> @intrinsic_vfwmacc_4x4x4_tu_f32mf2(<vscale x 1 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmacc_4x4x4_tu_f32mf2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT:    sf.vfwmacc.4x4x4 v8, v9, v10
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16(
+    <vscale x 1 x float> %0,
+    <vscale x 4 x bfloat> %1,
+    <vscale x 1 x bfloat> %2,
+    iXLen %3, iXLen 2)
+
+  ret <vscale x 1 x float> %a
+}
+
+define <vscale x 1 x float> @intrinsic_vfwmacc_4x4x4_ta_f32mf2(<vscale x 1 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 1 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmacc_4x4x4_ta_f32mf2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    sf.vfwmacc.4x4x4 v8, v9, v10
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 1 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv1f32.nxv4f16.nxv1f16(
+    <vscale x 1 x float> %0,
+    <vscale x 4 x bfloat> %1,
+    <vscale x 1 x bfloat> %2,
+    iXLen %3, iXLen 3)
+
+  ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16(
+  <vscale x 2 x float>,
+  <vscale x 4 x bfloat>,
+  <vscale x 2 x bfloat>,
+  iXLen, iXLen);
+
+define <vscale x 2 x float> @intrinsic_vfwmacc_4x4x4_tu_f32m1(<vscale x 2 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmacc_4x4x4_tu_f32m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT:    sf.vfwmacc.4x4x4 v8, v9, v10
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16(
+    <vscale x 2 x float> %0,
+    <vscale x 4 x bfloat> %1,
+    <vscale x 2 x bfloat> %2,
+    iXLen %3, iXLen 2)
+
+  ret <vscale x 2 x float> %a
+}
+
+define <vscale x 2 x float> @intrinsic_vfwmacc_4x4x4_ta_f32m1(<vscale x 2 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 2 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmacc_4x4x4_ta_f32m1:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    sf.vfwmacc.4x4x4 v8, v9, v10
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 2 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv2f32.nxv4f16.nxv2f16(
+    <vscale x 2 x float> %0,
+    <vscale x 4 x bfloat> %1,
+    <vscale x 2 x bfloat> %2,
+    iXLen %3, iXLen 3)
+
+  ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16(
+  <vscale x 4 x float>,
+  <vscale x 4 x bfloat>,
+  <vscale x 4 x bfloat>,
+  iXLen, iXLen);
+
+define <vscale x 4 x float> @intrinsic_vfwmacc_4x4x4_tu_f32m2(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmacc_4x4x4_tu_f32m2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT:    sf.vfwmacc.4x4x4 v8, v10, v11
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16(
+    <vscale x 4 x float> %0,
+    <vscale x 4 x bfloat> %1,
+    <vscale x 4 x bfloat> %2,
+    iXLen %3, iXLen 2)
+
+  ret <vscale x 4 x float> %a
+}
+
+define <vscale x 4 x float> @intrinsic_vfwmacc_4x4x4_ta_f32m2(<vscale x 4 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmacc_4x4x4_ta_f32m2:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    sf.vfwmacc.4x4x4 v8, v10, v11
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 4 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv4f32.nxv4f16.nxv4f16(
+    <vscale x 4 x float> %0,
+    <vscale x 4 x bfloat> %1,
+    <vscale x 4 x bfloat> %2,
+    iXLen %3, iXLen 3)
+
+  ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16(
+  <vscale x 8 x float>,
+  <vscale x 4 x bfloat>,
+  <vscale x 8 x bfloat>,
+  iXLen, iXLen);
+
+define <vscale x 8 x float> @intrinsic_vfwmacc_4x4x4_tu_f32m4(<vscale x 8 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmacc_4x4x4_tu_f32m4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT:    sf.vfwmacc.4x4x4 v8, v12, v14
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16(
+    <vscale x 8 x float> %0,
+    <vscale x 4 x bfloat> %1,
+    <vscale x 8 x bfloat> %2,
+    iXLen %3, iXLen 2)
+
+  ret <vscale x 8 x float> %a
+}
+
+define <vscale x 8 x float> @intrinsic_vfwmacc_4x4x4_ta_f32m4(<vscale x 8 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 8 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmacc_4x4x4_ta_f32m4:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    sf.vfwmacc.4x4x4 v8, v12, v14
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 8 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv8f32.nxv4f16.nxv8f16(
+    <vscale x 8 x float> %0,
+    <vscale x 4 x bfloat> %1,
+    <vscale x 8 x bfloat> %2,
+    iXLen %3, iXLen 3)
+
+  ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16(
+  <vscale x 16 x float>,
+  <vscale x 4 x bfloat>,
+  <vscale x 16 x bfloat>,
+  iXLen, iXLen);
+
+define <vscale x 16 x float> @intrinsic_vfwmacc_4x4x4_tu_f32m8(<vscale x 16 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmacc_4x4x4_tu_f32m8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, tu, ma
+; CHECK-NEXT:    sf.vfwmacc.4x4x4 v8, v16, v20
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16(
+    <vscale x 16 x float> %0,
+    <vscale x 4 x bfloat> %1,
+    <vscale x 16 x bfloat> %2,
+    iXLen %3, iXLen 2)
+
+  ret <vscale x 16 x float> %a
+}
+
+define <vscale x 16 x float> @intrinsic_vfwmacc_4x4x4_ta_f32m8(<vscale x 16 x float> %0, <vscale x 4 x bfloat> %1, <vscale x 16 x bfloat> %2, iXLen %3) nounwind {
+; CHECK-LABEL: intrinsic_vfwmacc_4x4x4_ta_f32m8:
+; CHECK:       # %bb.0: # %entry
+; CHECK-NEXT:    vsetvli zero, a0, e16, m1, ta, ma
+; CHECK-NEXT:    sf.vfwmacc.4x4x4 v8, v16, v20
+; CHECK-NEXT:    ret
+entry:
+  %a = call <vscale x 16 x float> @llvm.riscv.sf.vfwmacc.4x4x4.nxv16f32.nxv4f16.nxv16f16(
+    <vscale x 16 x float> %0,
+    <vscale x 4 x bfloat> %1,
+    <vscale x 16 x bfloat> %2,
+    iXLen %3, iXLen 3)
+
+  ret <vscale x 16 x float> %a
+}
diff --git a/llvm/test/MC/RISCV/rvv/xsfvfwmacc.s b/llvm/test/MC/RISCV/rvv/xsfvfwmacc.s
new file mode 100644
index 000000000000000..ba054fff2bd87eb
--- /dev/null
+++ b/llvm/test/MC/RISCV/rvv/xsfvfwmacc.s
@@ -0,0 +1,15 @@
+# RUN: llvm-mc -triple=riscv64 -show-encoding --mattr=+v,+xsfvfwmaccqqq %s \
+# RUN:        | FileCheck %s --check-prefixes=CHECK-ENCODING,CHECK-INST
+# RUN: not llvm-mc -triple=riscv64 -show-encoding %s 2>&1 \
+# RUN:        | FileCheck %s --check-prefix=CHECK-ERROR
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v,+xsfvfwmaccqqq %s \
+# RUN:        | llvm-objdump -d --mattr=+v,+xsfvfwmaccqqq - \
+# RUN:        | FileCheck %s --check-prefix=CHECK-INST
+# RUN: llvm-mc -triple=riscv64 -filetype=obj --mattr=+v,+xsfvfwmaccqqq %s \
+# RUN:        | llvm-objdump -d - | FileCheck %s --check-prefix=CHECK-UNKNOWN
+
+sf.vfwmacc.4x4x4 v8, v4, v20
+# CHECK-INST: sf.vfwmacc.4x4x4 v8, v4, v20
+# CHECK-ENCODING: [0x5b,0x14,0x42,0xf3]
+# CHECK-ERROR: instruction requires the following: 'XSfvfwmaccqqq' (SiFive Matrix Multiply Accumulate Instruction and 4-by-4))
+# CHECK-UNKNOWN: 5b 14 42 f3 <unknown>
diff --git a/llvm/unittests/Support/RISCVISAInfoTest.cpp b/llvm/unittests/Support/RISCVISAInfoTest.cpp
index 90e26a23e87c205..cf344b786fb55bd 100644
--- a/llvm/unittests/Support/RISCVISAInfoTest.cpp
+++ b/llvm/unittests/Support/RISCVISAInfoTest.cpp
@@ -713,6 +713,7 @@ R"(All available -march extensions for RISC-V
     xcvsimd             1.0
     xsfcie              1.0
     xsfvcp              1.0
+    xsfvfwmaccqqq       1.0
     xtheadba            1.0
     xtheadbb            1.0
     xtheadbs            1.0