[clang] [llvm] [RISCV] Intrinsic Support for RISC-V P extension (PR #157044)

via cfe-commits cfe-commits at lists.llvm.org
Fri Sep 5 01:14:06 PDT 2025


https://github.com/sihuan created https://github.com/llvm/llvm-project/pull/157044

This draft PR adds initial support for RISC-V P extension intrinsics. It implements the majority of intrinsics that do not use register pairs, providing both the LLVM IR intrinsics and the corresponding C builtins. Please view individual commits for details.

Implementation is based on the intrinsics draft: https://github.com/pz9115/rvp-intrinsic-doc/blob/0345c0af2776f13254304d5d2a7bc19749980d43/source/simd.md
Note: The new draft version has updated some types to vector form, but it remains incomplete.

According to discussions on the mailing list https://lists.riscv.org/g/tech-p-ext/topic/114558149, the types of these functions may be changed to vector form. I will follow up promptly.

>From cbd23953c5f3651632861672aec9725eaac23164 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Wed, 27 Aug 2025 18:03:01 +0800
Subject: [PATCH 01/40] [RISCV] Simple example for Packed SIMD LLVM IR
 intrinsic.

---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       | 29 +++++++++++++++
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td      | 17 +++++++++
 llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll | 36 +++++++++++++++++++
 llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll | 36 +++++++++++++++++++
 4 files changed, 118 insertions(+)
 create mode 100644 llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
 create mode 100644 llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 4b8a9da9688a2..977498e30ac36 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1898,6 +1898,35 @@ let TargetPrefix = "riscv" in {
 let TargetPrefix = "riscv" in
 def int_riscv_pause : DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrHasSideEffects]>;
 
+//===----------------------------------------------------------------------===//
+// Packed SIMD
+
+let TargetPrefix = "riscv" in {
+  class RVPBinaryIntrinsics
+      : Intrinsic<[llvm_any_ty],
+                  [LLVMMatchType<0>, LLVMMatchType<0>],
+                  [IntrNoMem]>;
+
+  multiclass RVPBinaryIntrinsics {
+    def "int_riscv_" # NAME   : RVPBinaryIntrinsics;
+  }
+
+  defm sadd : RVPBinaryIntrinsics;
+
+  class RVPBinaryAABIntrinsics
+      : Intrinsic<[llvm_any_ty],
+                  [LLVMMatchType<0>, llvm_any_ty],
+                  [IntrNoMem]>;
+
+  multiclass RVPBinaryAABIntrinsics {
+    def "int_riscv_" # NAME   : RVPBinaryAABIntrinsics;
+  }
+
+  defm pslli_b : RVPBinaryAABIntrinsics;
+  defm pslli_h : RVPBinaryAABIntrinsics;
+  defm pslli_w : RVPBinaryAABIntrinsics;
+} // TargetPrefix = "riscv"
+
 // Vendor extensions
 //===----------------------------------------------------------------------===//
 include "llvm/IR/IntrinsicsRISCVXTHead.td"
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index c342b41e41d01..614a2201eff31 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -889,3 +889,20 @@ let Predicates = [HasStdExtP, IsRV32] in {
     let Inst{23-15} = imm10{9-1};
   }
 }
+
+//===----------------------------------------------------------------------===//
+// Codegen patterns
+//===----------------------------------------------------------------------===//
+
+let Predicates = [HasStdExtP] in {
+def : PatGprImm<int_riscv_pslli_b, PSLLI_B, uimm3>;
+def : PatGprImm<int_riscv_pslli_h, PSLLI_H, uimm4>;
+} // Predicates = [HasStdExtP]
+
+let Predicates = [HasStdExtP, IsRV32] in {
+def : PatGprGpr<int_riscv_sadd, SADD>;
+} // Predicates = [HasStdExtP, IsRV32]
+
+let Predicates = [HasStdExtP, IsRV64] in {
+def : PatGprImm<int_riscv_pslli_w, PSLLI_W, uimm5>;
+} // Predicates = [HasStdExtP, IsRV64]
diff --git a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
new file mode 100644
index 0000000000000..e0bc6a9f9f2ab
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV32P
+
+declare i32 @llvm.riscv.pslli.b.i32.i32(i32, i32)
+
+define i32 @pslli_b(i32 %a, i32 %b) {
+; RV32P-LABEL: pslli_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pslli.b a0, a0, 1
+; RV32P-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.pslli.b.i32.i32(i32 %a, i32 1)
+  ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pslli.h.i32.i32(i32, i32)
+
+define i32 @pslli_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pslli_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pslli.h a0, a0, 1
+; RV32P-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.pslli.h.i32.i32(i32 %a, i32 1)
+  ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.sadd.i32(i32, i32)
+
+define i32 @sadd(i32 %a, i32 %b) {
+; RV32P-LABEL: sadd:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    sadd a0, a0, a1
+; RV32P-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.sadd.i32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
new file mode 100644
index 0000000000000..42870c9ce571b
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
@@ -0,0 +1,36 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s \
+; RUN:   | FileCheck %s -check-prefix=RV64P
+
+declare i64 @llvm.riscv.pslli_b.i64.i64(i64, i64)
+
+define i64 @pslli.b(i64 %a, i64 %b) {
+; RV64P-LABEL: pslli.b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pslli.b a0, a0, 1
+; RV64P-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.pslli.b.i64.i64(i64 %a, i64 1)
+  ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pslli_h.i64.i64(i64, i64)
+
+define i64 @pslli.h(i64 %a, i64 %b) {
+; RV64P-LABEL: pslli.h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pslli.h a0, a0, 1
+; RV64P-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.pslli.h.i64.i64(i64 %a, i64 1)
+  ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pslli_w.i64.i64(i64, i64)
+
+define i64 @pslli.w(i64 %a, i64 %b) {
+; RV64P-LABEL: pslli.w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pslli.w a0, a0, 1
+; RV64P-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.pslli.w.i64.i64(i64 %a, i64 1)
+  ret i64 %tmp
+}

>From 109994a822635185c7ae06e12fe30e508734a914 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Wed, 27 Aug 2025 18:03:19 +0800
Subject: [PATCH 02/40] [RISCV] Simple example for Packed SIMD C intrinsic.

---
 clang/include/clang/Basic/BuiltinsRISCV.td    | 14 ++++
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp    | 33 +++++++++-
 clang/lib/Headers/CMakeLists.txt              |  1 +
 clang/lib/Headers/riscv_simd.h                | 65 +++++++++++++++++++
 .../RISCV/rvp-intrinsics/riscv32-simd.c       | 33 ++++++++++
 .../RISCV/rvp-intrinsics/riscv64-simd.c       | 36 ++++++++++
 6 files changed, 181 insertions(+), 1 deletion(-)
 create mode 100644 clang/lib/Headers/riscv_simd.h
 create mode 100644 clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
 create mode 100644 clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index 5927eaf80d57a..460b0e0806b33 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -137,6 +137,20 @@ def sm3p0 : RISCVBuiltin<"unsigned int(unsigned int)">;
 def sm3p1 : RISCVBuiltin<"unsigned int(unsigned int)">;
 } // Features = "zksh"
 
+//===----------------------------------------------------------------------===//
+// Packed SIMD extension.
+//===----------------------------------------------------------------------===//
+let Features = "experimental-p,32bit" in {
+def pslli_b_32 : RISCVBuiltin<"unsigned int(unsigned int, int32_t)">;
+def pslli_h_32 : RISCVBuiltin<"unsigned int(unsigned int, int32_t)">;
+def sadd : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+} // Features = "experimental-p,32bit"
+
+let Features = "experimental-p,64bit" in {
+def pslli_b_64 : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def pslli_h_64 : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def pslli_w : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+} // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
 //===----------------------------------------------------------------------===//
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index b08a0588c5ac1..c2c2b53c7bd67 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -208,7 +208,8 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_brev8_32:
   case RISCV::BI__builtin_riscv_brev8_64:
   case RISCV::BI__builtin_riscv_zip_32:
-  case RISCV::BI__builtin_riscv_unzip_32: {
+  case RISCV::BI__builtin_riscv_unzip_32:
+  case RISCV::BI__builtin_riscv_sadd: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
     // Zbb
@@ -252,12 +253,42 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_unzip_32:
       ID = Intrinsic::riscv_unzip;
       break;
+    case RISCV::BI__builtin_riscv_sadd:
+      ID = Intrinsic::riscv_sadd;
+      break;
     }
 
     IntrinsicTypes = {ResultType};
     break;
   }
 
+  case RISCV::BI__builtin_riscv_pslli_b_32:
+  case RISCV::BI__builtin_riscv_pslli_b_64:
+  case RISCV::BI__builtin_riscv_pslli_h_32:
+  case RISCV::BI__builtin_riscv_pslli_h_64:
+  case RISCV::BI__builtin_riscv_pslli_w:
+  case RISCV::BI__builtin_riscv_psslai_h_32:
+  case RISCV::BI__builtin_riscv_psslai_h_64:
+  case RISCV::BI__builtin_riscv_psslai_w:
+  case RISCV::BI__builtin_riscv_sslai: {
+    switch (BuiltinID) {
+    default: llvm_unreachable("unexpected builtin ID");
+    case RISCV::BI__builtin_riscv_pslli_b_32:
+    case RISCV::BI__builtin_riscv_pslli_b_64:
+      ID = Intrinsic::riscv_pslli_b;
+      break;
+    case RISCV::BI__builtin_riscv_pslli_h_32:
+    case RISCV::BI__builtin_riscv_pslli_h_64:
+      ID = Intrinsic::riscv_pslli_h;
+      break;
+    case RISCV::BI__builtin_riscv_pslli_w:
+      ID = Intrinsic::riscv_pslli_w;
+      break;
+    }
+    IntrinsicTypes = {ResultType, Ops[1]->getType()};
+    break;
+  }
+
   // Zk builtins
 
   // Zknh
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index dd52498bbef4c..4e06c37da278d 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -129,6 +129,7 @@ set(riscv_files
   riscv_crypto.h
   riscv_nds.h
   riscv_ntlh.h
+  riscv_simd.h
   sifive_vector.h
   andes_vector.h
   )
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
new file mode 100644
index 0000000000000..9ad90d1674f0d
--- /dev/null
+++ b/clang/lib/Headers/riscv_simd.h
@@ -0,0 +1,65 @@
+/*===---- riscv_simd.h - RISC-V 'Packed SIMD' intrinsics --------------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+
+#ifndef __RISCV_PACKED_SIMD_H
+#define __RISCV_PACKED_SIMD_H
+
+#include <stdint.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#if defined (__riscv_p)
+
+#if __riscv_xlen == 32
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pslli_b(uint32_t __x, int __y) {
+  return __builtin_riscv_pslli_b_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pslli_h(uint32_t __x, int __y) {
+  return __builtin_riscv_pslli_h_32(__x, __y);
+}
+
+
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_sadd(int32_t __x, int32_t __y) {
+  return __builtin_riscv_sadd(__x, __y);
+}
+#endif
+
+
+#if __riscv_xlen == 64
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pslli_b(uint64_t __x, int __y) {
+  return __builtin_riscv_pslli_b_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pslli_h(uint64_t __x, int __y) {
+  return __builtin_riscv_pslli_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pslli_w(uint64_t __x, int __y) {
+  return __builtin_riscv_pslli_w(__x, __y);
+}
+#endif
+
+#endif // defined(__riscv_p)
+
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
new file mode 100644
index 0000000000000..ce2a7820cf5ce
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
@@ -0,0 +1,33 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple riscv32 -target-feature +experimental-p -emit-llvm %s -o - \
+// RUN:     -disable-O0-optnone | opt -S -passes=mem2reg \
+// RUN:     | FileCheck %s  -check-prefix=RV32P
+
+#include <riscv_simd.h>
+
+// RV32P-LABEL: @pslli_b(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pslli.b.i32.i32(i32 [[RS1:%.*]], i32 1)
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pslli_b(uint32_t rs1, int32_t rs2) {
+  return __riscv_pslli_b(rs1, 1);
+}
+
+// RV32P-LABEL: @pslli_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pslli.h.i32.i32(i32 [[RS1:%.*]], i32 1)
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pslli_h(uint32_t rs1, int32_t rs2) {
+  return __riscv_pslli_h(rs1, 1);
+}
+
+// RV32P-LABEL: @sadd(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.sadd.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+int32_t sadd(int32_t rs1, int32_t rs2) {
+  return __riscv_sadd(rs1, rs2);
+}
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
new file mode 100644
index 0000000000000..f321f52cdd2ca
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -0,0 +1,36 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-p -emit-llvm %s -o - \
+// RUN:     -disable-O0-optnone | opt -S -passes=mem2reg \
+// RUN:     | FileCheck %s  -check-prefix=RV64P
+
+#include <riscv_simd.h>
+
+// RV64P-LABEL: @pslli_b(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[CONV_I:%.*]] = sext i32 1 to i64
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pslli.b.i64.i64(i64 [[RS1:%.*]], i64 [[CONV_I]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pslli_b(uint64_t rs1, int64_t rs2) {
+  return __riscv_pslli_b(rs1, 1);
+}
+
+// RV64P-LABEL: @pslli_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[CONV_I:%.*]] = sext i32 1 to i64
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pslli.h.i64.i64(i64 [[RS1:%.*]], i64 [[CONV_I]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pslli_h(uint64_t rs1, int64_t rs2) {
+  return __riscv_pslli_h(rs1, 1);
+}
+
+// RV64P-LABEL: @pslli_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[CONV_I:%.*]] = sext i32 1 to i64
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pslli.w.i64.i64(i64 [[RS1:%.*]], i64 [[CONV_I]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pslli_w(uint64_t rs1, int64_t rs2) {
+  return __riscv_pslli_w(rs1, 1);
+}

>From d940b1cf09c18b0fe6307267045dbd0c86ddf78e Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Thu, 28 Aug 2025 13:40:22 +0800
Subject: [PATCH 03/40] [RISCV] Packed Shift Left Immediate LLVM IR intrinsics

---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       |  9 +++++---
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td      | 13 ++++++-----
 llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll | 22 +++++++++++++++++++
 llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll | 22 +++++++++++++++++++
 4 files changed, 58 insertions(+), 8 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 977498e30ac36..75364681637dd 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1922,9 +1922,12 @@ let TargetPrefix = "riscv" in {
     def "int_riscv_" # NAME   : RVPBinaryAABIntrinsics;
   }
 
-  defm pslli_b : RVPBinaryAABIntrinsics;
-  defm pslli_h : RVPBinaryAABIntrinsics;
-  defm pslli_w : RVPBinaryAABIntrinsics;
+  defm pslli_b  : RVPBinaryAABIntrinsics;
+  defm pslli_h  : RVPBinaryAABIntrinsics;
+  defm pslli_w  : RVPBinaryAABIntrinsics;
+  defm psslai_h : RVPBinaryAABIntrinsics;
+  defm psslai_w : RVPBinaryAABIntrinsics;
+  defm sslai    : RVPBinaryAABIntrinsics;
 } // TargetPrefix = "riscv"
 
 // Vendor extensions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 614a2201eff31..91468daebe4bf 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -895,14 +895,17 @@ let Predicates = [HasStdExtP, IsRV32] in {
 //===----------------------------------------------------------------------===//
 
 let Predicates = [HasStdExtP] in {
-def : PatGprImm<int_riscv_pslli_b, PSLLI_B, uimm3>;
-def : PatGprImm<int_riscv_pslli_h, PSLLI_H, uimm4>;
+def : PatGprImm<int_riscv_pslli_b,  PSLLI_B,  uimm3>;
+def : PatGprImm<int_riscv_pslli_h,  PSLLI_H,  uimm4>;
+def : PatGprImm<int_riscv_psslai_h, PSSLAI_H, uimm4>;
 } // Predicates = [HasStdExtP]
 
 let Predicates = [HasStdExtP, IsRV32] in {
-def : PatGprGpr<int_riscv_sadd, SADD>;
+def : PatGprImm<int_riscv_sslai, SSLAI, uimm5>;
+def : PatGprGpr<int_riscv_sadd,  SADD>;
 } // Predicates = [HasStdExtP, IsRV32]
 
 let Predicates = [HasStdExtP, IsRV64] in {
-def : PatGprImm<int_riscv_pslli_w, PSLLI_W, uimm5>;
-} // Predicates = [HasStdExtP, IsRV64]
+def : PatGprImm<int_riscv_pslli_w,  PSLLI_W,  uimm5>;
+def : PatGprImm<int_riscv_psslai_w, PSSLAI_W, uimm5>;
+} // Predicates = [HasStdExtP, IsRV64]
\ No newline at end of file
diff --git a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
index e0bc6a9f9f2ab..3d9d589f10e87 100644
--- a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
@@ -24,6 +24,28 @@ define i32 @pslli_h(i32 %a, i32 %b) {
   ret i32 %tmp
 }
 
+declare i32 @llvm.riscv.psslai.h.i32.i32(i32, i32)
+
+define i32 @psslai_h(i32 %a, i32 %b) {
+; RV32P-LABEL: psslai_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psslai.h a0, a0, 1
+; RV32P-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.psslai.h.i32.i32(i32 %a, i32 1)
+  ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.sslai.i32.i32(i32, i32)
+
+define i32 @sslai(i32 %a, i32 %b) {
+; RV32P-LABEL: sslai:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    sslai a0, a0, 1
+; RV32P-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.sslai.i32.i32(i32 %a, i32 1)
+  ret i32 %tmp
+}
+
 declare i32 @llvm.riscv.sadd.i32(i32, i32)
 
 define i32 @sadd(i32 %a, i32 %b) {
diff --git a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
index 42870c9ce571b..84d7c400fa059 100644
--- a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
@@ -34,3 +34,25 @@ define i64 @pslli.w(i64 %a, i64 %b) {
   %tmp = call i64 @llvm.riscv.pslli.w.i64.i64(i64 %a, i64 1)
   ret i64 %tmp
 }
+
+declare i64 @llvm.riscv.psslai_h.i64.i64(i64, i64)
+
+define i64 @psslai.h(i64 %a, i64 %b) {
+; RV64P-LABEL: psslai.h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psslai.h a0, a0, 1
+; RV64P-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.psslai.h.i64.i64(i64 %a, i64 1)
+  ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psslai_w.i64.i64(i64, i64)
+
+define i64 @psslai.w(i64 %a, i64 %b) {
+; RV64P-LABEL: psslai.w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psslai.w a0, a0, 1
+; RV64P-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.psslai.w.i64.i64(i64 %a, i64 1)
+  ret i64 %tmp
+}

>From df4d838b4fc643b183d0244818bb2bc950e811eb Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Thu, 28 Aug 2025 13:59:42 +0800
Subject: [PATCH 04/40] [RISCV] Packed Shift Left Register LLVM IR intrinsics

---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       |  6 ++--
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td      |  3 ++
 llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll | 22 +++++++++++++
 llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll | 33 +++++++++++++++++++
 4 files changed, 62 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 75364681637dd..224da9ae39019 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1910,8 +1910,10 @@ let TargetPrefix = "riscv" in {
   multiclass RVPBinaryIntrinsics {
     def "int_riscv_" # NAME   : RVPBinaryIntrinsics;
   }
-
-  defm sadd : RVPBinaryIntrinsics;
+  defm psll_bs : RVPBinaryIntrinsics;
+  defm psll_hs : RVPBinaryIntrinsics;
+  defm psll_ws : RVPBinaryIntrinsics;
+  defm sadd    : RVPBinaryIntrinsics;
 
   class RVPBinaryAABIntrinsics
       : Intrinsic<[llvm_any_ty],
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 91468daebe4bf..b72a909114990 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -898,6 +898,8 @@ let Predicates = [HasStdExtP] in {
 def : PatGprImm<int_riscv_pslli_b,  PSLLI_B,  uimm3>;
 def : PatGprImm<int_riscv_pslli_h,  PSLLI_H,  uimm4>;
 def : PatGprImm<int_riscv_psslai_h, PSSLAI_H, uimm4>;
+def : PatGprGpr<int_riscv_psll_bs,  PSLL_BS>;
+def : PatGprGpr<int_riscv_psll_hs,  PSLL_HS>;
 } // Predicates = [HasStdExtP]
 
 let Predicates = [HasStdExtP, IsRV32] in {
@@ -908,4 +910,5 @@ def : PatGprGpr<int_riscv_sadd,  SADD>;
 let Predicates = [HasStdExtP, IsRV64] in {
 def : PatGprImm<int_riscv_pslli_w,  PSLLI_W,  uimm5>;
 def : PatGprImm<int_riscv_psslai_w, PSSLAI_W, uimm5>;
+def : PatGprGpr<int_riscv_psll_ws,  PSLL_WS>;
 } // Predicates = [HasStdExtP, IsRV64]
\ No newline at end of file
diff --git a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
index 3d9d589f10e87..6487acac5c272 100644
--- a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
@@ -46,6 +46,28 @@ define i32 @sslai(i32 %a, i32 %b) {
   ret i32 %tmp
 }
 
+declare i32 @llvm.riscv.psll.bs.i32.i32(i32, i32)
+
+define i32 @psll_bs(i32 %a, i32 %b) {
+; RV32P-LABEL: psll_bs:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psll.bs a0, a0, a1
+; RV32P-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.psll.bs.i32.i32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.psll.hs.i32.i32(i32, i32)
+
+define i32 @psll_hs(i32 %a, i32 %b) {
+; RV32P-LABEL: psll_hs:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psll.hs a0, a0, a1
+; RV32P-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.psll.hs.i32.i32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
+
 declare i32 @llvm.riscv.sadd.i32(i32, i32)
 
 define i32 @sadd(i32 %a, i32 %b) {
diff --git a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
index 84d7c400fa059..8dad2fe8d4449 100644
--- a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
@@ -56,3 +56,36 @@ define i64 @psslai.w(i64 %a, i64 %b) {
   %tmp = call i64 @llvm.riscv.psslai.w.i64.i64(i64 %a, i64 1)
   ret i64 %tmp
 }
+
+declare i64 @llvm.riscv.psll.bs.i64.i64(i64, i64)
+
+define i64 @psll_bs(i64 %a, i64 %b) {
+; RV64P-LABEL: psll_bs:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psll.bs a0, a0, a1
+; RV64P-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.psll.bs.i64.i64(i64 %a, i64 %b)
+  ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psll.hs.i64.i64(i64, i64)
+
+define i64 @psll_hs(i64 %a, i64 %b) {
+; RV64P-LABEL: psll_hs:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psll.hs a0, a0, a1
+; RV64P-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.psll.hs.i64.i64(i64 %a, i64 %b)
+  ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psll.ws.i64.i64(i64, i64)
+
+define i64 @psll_ws(i64 %a, i64 %b) {
+; RV64P-LABEL: psll_ws:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psll.ws a0, a0, a1
+; RV64P-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.psll.ws.i64.i64(i64 %a, i64 %b)
+  ret i64 %tmp
+}

>From 8f0031a381487bbd5d3fcfa2ca988a6223182572 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Thu, 28 Aug 2025 14:05:44 +0800
Subject: [PATCH 05/40] [RISCV] Packed Addition LLVM IR intrinsics

---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       |  3 ++
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td      |  3 ++
 llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll | 22 +++++++++++++
 llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll | 33 +++++++++++++++++++
 4 files changed, 61 insertions(+)

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 224da9ae39019..c42bfef9fc79a 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1913,6 +1913,9 @@ let TargetPrefix = "riscv" in {
   defm psll_bs : RVPBinaryIntrinsics;
   defm psll_hs : RVPBinaryIntrinsics;
   defm psll_ws : RVPBinaryIntrinsics;
+  defm padd_bs : RVPBinaryIntrinsics;
+  defm padd_hs : RVPBinaryIntrinsics;
+  defm padd_ws : RVPBinaryIntrinsics;
   defm sadd    : RVPBinaryIntrinsics;
 
   class RVPBinaryAABIntrinsics
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index b72a909114990..fc00ba2d88ee4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -900,6 +900,8 @@ def : PatGprImm<int_riscv_pslli_h,  PSLLI_H,  uimm4>;
 def : PatGprImm<int_riscv_psslai_h, PSSLAI_H, uimm4>;
 def : PatGprGpr<int_riscv_psll_bs,  PSLL_BS>;
 def : PatGprGpr<int_riscv_psll_hs,  PSLL_HS>;
+def : PatGprGpr<int_riscv_padd_bs,  PADD_BS>;
+def : PatGprGpr<int_riscv_padd_hs,  PADD_HS>;
 } // Predicates = [HasStdExtP]
 
 let Predicates = [HasStdExtP, IsRV32] in {
@@ -911,4 +913,5 @@ let Predicates = [HasStdExtP, IsRV64] in {
 def : PatGprImm<int_riscv_pslli_w,  PSLLI_W,  uimm5>;
 def : PatGprImm<int_riscv_psslai_w, PSSLAI_W, uimm5>;
 def : PatGprGpr<int_riscv_psll_ws,  PSLL_WS>;
+def : PatGprGpr<int_riscv_padd_ws,  PADD_WS>;
 } // Predicates = [HasStdExtP, IsRV64]
\ No newline at end of file
diff --git a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
index 6487acac5c272..b68f044bf44ac 100644
--- a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
@@ -68,6 +68,28 @@ define i32 @psll_hs(i32 %a, i32 %b) {
   ret i32 %tmp
 }
 
+declare i32 @llvm.riscv.padd.bs.i32.i32(i32, i32)
+
+define i32 @padd_bs(i32 %a, i32 %b) {
+; RV32P-LABEL: padd_bs:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    padd.bs a0, a0, a1
+; RV32P-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.padd.bs.i32.i32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.padd.hs.i32.i32(i32, i32)
+
+define i32 @padd_hs(i32 %a, i32 %b) {
+; RV32P-LABEL: padd_hs:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    padd.hs a0, a0, a1
+; RV32P-NEXT:    ret
+  %tmp = call i32 @llvm.riscv.padd.hs.i32.i32(i32 %a, i32 %b)
+  ret i32 %tmp
+}
+
 declare i32 @llvm.riscv.sadd.i32(i32, i32)
 
 define i32 @sadd(i32 %a, i32 %b) {
diff --git a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
index 8dad2fe8d4449..4a11f187a35f9 100644
--- a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
@@ -89,3 +89,36 @@ define i64 @psll_ws(i64 %a, i64 %b) {
   %tmp = call i64 @llvm.riscv.psll.ws.i64.i64(i64 %a, i64 %b)
   ret i64 %tmp
 }
+
+declare i64 @llvm.riscv.padd.bs.i64.i64(i64, i64)
+
+define i64 @padd_bs(i64 %a, i64 %b) {
+; RV64P-LABEL: padd_bs:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    padd.bs a0, a0, a1
+; RV64P-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.padd.bs.i64.i64(i64 %a, i64 %b)
+  ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.padd.hs.i64.i64(i64, i64)
+
+define i64 @padd_hs(i64 %a, i64 %b) {
+; RV64P-LABEL: padd_hs:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    padd.hs a0, a0, a1
+; RV64P-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.padd.hs.i64.i64(i64 %a, i64 %b)
+  ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.padd.ws.i64.i64(i64, i64)
+
+define i64 @padd_ws(i64 %a, i64 %b) {
+; RV64P-LABEL: padd_ws:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    padd.ws a0, a0, a1
+; RV64P-NEXT:    ret
+  %tmp = call i64 @llvm.riscv.padd.ws.i64.i64(i64 %a, i64 %b)
+  ret i64 %tmp
+}

>From 31cc992cb0ac8221b53c8763ab11d4caa2b4e460 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Thu, 28 Aug 2025 14:23:00 +0800
Subject: [PATCH 06/40] [RISCV] Packed Shift Left Register C intrinsics

---
 clang/include/clang/Basic/BuiltinsRISCV.td    |  9 +++++--
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp    | 16 +++++++++++
 clang/lib/Headers/riscv_simd.h                | 24 +++++++++++++++++
 .../RISCV/rvp-intrinsics/riscv32-simd.c       | 18 +++++++++++++
 .../RISCV/rvp-intrinsics/riscv64-simd.c       | 27 +++++++++++++++++++
 5 files changed, 92 insertions(+), 2 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index 460b0e0806b33..43b42628d1ae0 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -143,13 +143,18 @@ def sm3p1 : RISCVBuiltin<"unsigned int(unsigned int)">;
 let Features = "experimental-p,32bit" in {
 def pslli_b_32 : RISCVBuiltin<"unsigned int(unsigned int, int32_t)">;
 def pslli_h_32 : RISCVBuiltin<"unsigned int(unsigned int, int32_t)">;
-def sadd : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def psll_bs_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psll_hs_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def sadd       : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
 } // Features = "experimental-p,32bit"
 
 let Features = "experimental-p,64bit" in {
 def pslli_b_64 : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
 def pslli_h_64 : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
-def pslli_w : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def pslli_w    : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def psll_bs_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psll_hs_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psll_ws    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index c2c2b53c7bd67..c40f0a0672ecc 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -209,6 +209,11 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_brev8_64:
   case RISCV::BI__builtin_riscv_zip_32:
   case RISCV::BI__builtin_riscv_unzip_32:
+  case RISCV::BI__builtin_riscv_psll_bs_32:
+  case RISCV::BI__builtin_riscv_psll_bs_64:
+  case RISCV::BI__builtin_riscv_psll_hs_32:
+  case RISCV::BI__builtin_riscv_psll_hs_64:
+  case RISCV::BI__builtin_riscv_psll_ws:
   case RISCV::BI__builtin_riscv_sadd: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -253,6 +258,17 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_unzip_32:
       ID = Intrinsic::riscv_unzip;
       break;
+    case RISCV::BI__builtin_riscv_psll_bs_32:
+    case RISCV::BI__builtin_riscv_psll_bs_64:
+      ID = Intrinsic::riscv_psll_bs;
+      break;
+    case RISCV::BI__builtin_riscv_psll_hs_32:
+    case RISCV::BI__builtin_riscv_psll_hs_64:
+      ID = Intrinsic::riscv_psll_hs;
+      break;
+    case RISCV::BI__builtin_riscv_psll_ws:
+      ID = Intrinsic::riscv_psll_ws;
+      break;
     case RISCV::BI__builtin_riscv_sadd:
       ID = Intrinsic::riscv_sadd;
       break;
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index 9ad90d1674f0d..f25d79956ad0d 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -30,6 +30,15 @@ __riscv_pslli_h(uint32_t __x, int __y) {
   return __builtin_riscv_pslli_h_32(__x, __y);
 }
 
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psll_bs(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_psll_bs_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psll_hs(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_psll_hs_32(__x, __y);
+}
 
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
 __riscv_sadd(int32_t __x, int32_t __y) {
@@ -53,6 +62,21 @@ static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __riscv_pslli_w(uint64_t __x, int __y) {
   return __builtin_riscv_pslli_w(__x, __y);
 }
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psll_bs(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psll_bs_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psll_hs(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psll_hs_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psll_ws(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psll_ws(__x, __y);
+}
 #endif
 
 #endif // defined(__riscv_p)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
index ce2a7820cf5ce..ce7cccecc4229 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
@@ -23,6 +23,24 @@ uint32_t pslli_h(uint32_t rs1, int32_t rs2) {
   return __riscv_pslli_h(rs1, 1);
 }
 
+// RV32P-LABEL: @psll_bs(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.psll.bs.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t psll_bs(uint32_t rs1, uint32_t rs2) {
+  return __riscv_psll_bs(rs1, rs2);
+}
+
+// RV32P-LABEL: @psll_hs(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.psll.hs.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t psll_hs(uint32_t rs1, uint32_t rs2) {
+  return __riscv_psll_hs(rs1, rs2);
+}
+
 // RV32P-LABEL: @sadd(
 // RV32P-NEXT:  entry:
 // RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.sadd.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
index f321f52cdd2ca..f5883eb72e059 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -34,3 +34,30 @@ uint64_t pslli_h(uint64_t rs1, int64_t rs2) {
 uint64_t pslli_w(uint64_t rs1, int64_t rs2) {
   return __riscv_pslli_w(rs1, 1);
 }
+
+// RV64P-LABEL: @psll_bs(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psll.bs.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psll_bs(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psll_bs(rs1, rs2);
+}
+
+// RV64P-LABEL: @psll_hs(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psll.hs.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psll_hs(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psll_hs(rs1, rs2);
+}
+
+// RV64P-LABEL: @psll_ws(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psll.ws.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psll_ws(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psll_ws(rs1, rs2);
+}

>From ae51bafbd110252f0e365412a09fa798b50cdb8a Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Thu, 28 Aug 2025 14:39:47 +0800
Subject: [PATCH 07/40] [RISCV] Packed Shift Left Immediate C intrinsics

---
 clang/include/clang/Basic/BuiltinsRISCV.td    | 26 +++++++++++--------
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp    | 10 +++++++
 clang/lib/Headers/riscv_simd.h                | 20 ++++++++++++++
 .../RISCV/rvp-intrinsics/riscv32-simd.c       | 18 +++++++++++++
 .../RISCV/rvp-intrinsics/riscv64-simd.c       | 20 ++++++++++++++
 5 files changed, 83 insertions(+), 11 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index 43b42628d1ae0..5d5a1f0054b09 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -141,20 +141,24 @@ def sm3p1 : RISCVBuiltin<"unsigned int(unsigned int)">;
 // Packed SIMD extension.
 //===----------------------------------------------------------------------===//
 let Features = "experimental-p,32bit" in {
-def pslli_b_32 : RISCVBuiltin<"unsigned int(unsigned int, int32_t)">;
-def pslli_h_32 : RISCVBuiltin<"unsigned int(unsigned int, int32_t)">;
-def psll_bs_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def psll_hs_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def sadd       : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def pslli_b_32  : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
+def pslli_h_32  : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
+def psslai_h_32 : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
+def sslai       : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
+def psll_bs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psll_hs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def sadd        : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
 } // Features = "experimental-p,32bit"
 
 let Features = "experimental-p,64bit" in {
-def pslli_b_64 : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
-def pslli_h_64 : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
-def pslli_w    : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
-def psll_bs_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psll_hs_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psll_ws    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pslli_b_64  : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def pslli_h_64  : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def pslli_w     : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def psslai_h_64 : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def psslai_w    : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def psll_bs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psll_hs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psll_ws     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index c40f0a0672ecc..28222cd0505a6 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -300,6 +300,16 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_pslli_w:
       ID = Intrinsic::riscv_pslli_w;
       break;
+    case RISCV::BI__builtin_riscv_psslai_h_32:
+    case RISCV::BI__builtin_riscv_psslai_h_64:
+      ID = Intrinsic::riscv_psslai_h;
+      break;
+    case RISCV::BI__builtin_riscv_psslai_w:
+      ID = Intrinsic::riscv_psslai_w;
+      break;
+    case RISCV::BI__builtin_riscv_sslai:
+      ID = Intrinsic::riscv_sslai;
+      break;
     }
     IntrinsicTypes = {ResultType, Ops[1]->getType()};
     break;
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index f25d79956ad0d..ea10fb0a948d9 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -30,6 +30,16 @@ __riscv_pslli_h(uint32_t __x, int __y) {
   return __builtin_riscv_pslli_h_32(__x, __y);
 }
 
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+  __riscv_psslai_h(uint32_t __x, int __y) {
+  return __builtin_riscv_psslai_h_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_sslai(uint32_t __x, int __y) {
+  return __builtin_riscv_sslai(__x, __y);
+}
+
 static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
 __riscv_psll_bs(uint32_t __x, uint32_t __y) {
   return __builtin_riscv_psll_bs_32(__x, __y);
@@ -63,6 +73,16 @@ __riscv_pslli_w(uint64_t __x, int __y) {
   return __builtin_riscv_pslli_w(__x, __y);
 }
 
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psslai_h(uint64_t __x, int __y) {
+  return __builtin_riscv_psslai_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psslai_w(uint64_t __x, int __y) {
+  return __builtin_riscv_psslai_w(__x, __y);
+}
+
 static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __riscv_psll_bs(uint64_t __x, uint64_t __y) {
   return __builtin_riscv_psll_bs_64(__x, __y);
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
index ce7cccecc4229..e4850cf456e6b 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
@@ -23,6 +23,24 @@ uint32_t pslli_h(uint32_t rs1, int32_t rs2) {
   return __riscv_pslli_h(rs1, 1);
 }
 
+// RV32P-LABEL: @psslai_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.psslai.h.i32.i32(i32 [[RS1:%.*]], i32 1)
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t psslai_h(uint32_t rs1, int32_t rs2) {
+  return __riscv_psslai_h(rs1, 1);
+}
+
+// RV32P-LABEL: @sslai(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.sslai.i32.i32(i32 [[RS1:%.*]], i32 1)
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t sslai(uint32_t rs1, int32_t rs2) {
+  return __riscv_sslai(rs1, 1);
+}
+
 // RV32P-LABEL: @psll_bs(
 // RV32P-NEXT:  entry:
 // RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.psll.bs.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
index f5883eb72e059..6f7170f5335dd 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -35,6 +35,26 @@ uint64_t pslli_w(uint64_t rs1, int64_t rs2) {
   return __riscv_pslli_w(rs1, 1);
 }
 
+// RV64P-LABEL: @psslai_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[CONV_I:%.*]] = sext i32 1 to i64
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psslai.h.i64.i64(i64 [[RS1:%.*]], i64 [[CONV_I]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psslai_h(uint64_t rs1, int64_t rs2) {
+  return __riscv_psslai_h(rs1, 1);
+}
+
+// RV64P-LABEL: @psslai_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[CONV_I:%.*]] = sext i32 1 to i64
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psslai.w.i64.i64(i64 [[RS1:%.*]], i64 [[CONV_I]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psslai_w(uint64_t rs1, int64_t rs2) {
+  return __riscv_psslai_w(rs1, 1);
+}
+
 // RV64P-LABEL: @psll_bs(
 // RV64P-NEXT:  entry:
 // RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psll.bs.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])

>From c9ef190d0c87d7611312a1023d81897fd4c63569 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Thu, 28 Aug 2025 15:00:50 +0800
Subject: [PATCH 08/40] [RISCV] Packed Addition C intrinsics

---
 clang/include/clang/Basic/BuiltinsRISCV.td    |  5 ++++
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp    | 18 +++++++++++++
 clang/lib/Headers/riscv_simd.h                | 25 +++++++++++++++++
 .../RISCV/rvp-intrinsics/riscv32-simd.c       | 18 +++++++++++++
 .../RISCV/rvp-intrinsics/riscv64-simd.c       | 27 +++++++++++++++++++
 5 files changed, 93 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index 5d5a1f0054b09..ee60709008610 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -147,6 +147,8 @@ def psslai_h_32 : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
 def sslai       : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
 def psll_bs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def psll_hs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def padd_bs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def padd_hs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def sadd        : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
 } // Features = "experimental-p,32bit"
 
@@ -159,6 +161,9 @@ def psslai_w    : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
 def psll_bs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def psll_hs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def psll_ws     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def padd_bs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def padd_hs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def padd_ws     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index 28222cd0505a6..00ede2b2de64e 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -214,6 +214,11 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_psll_hs_32:
   case RISCV::BI__builtin_riscv_psll_hs_64:
   case RISCV::BI__builtin_riscv_psll_ws:
+  case RISCV::BI__builtin_riscv_padd_bs_32:
+  case RISCV::BI__builtin_riscv_padd_bs_64:
+  case RISCV::BI__builtin_riscv_padd_hs_32:
+  case RISCV::BI__builtin_riscv_padd_hs_64:
+  case RISCV::BI__builtin_riscv_padd_ws:
   case RISCV::BI__builtin_riscv_sadd: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -258,6 +263,8 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_unzip_32:
       ID = Intrinsic::riscv_unzip;
       break;
+
+    // Packed SIMD
     case RISCV::BI__builtin_riscv_psll_bs_32:
     case RISCV::BI__builtin_riscv_psll_bs_64:
       ID = Intrinsic::riscv_psll_bs;
@@ -269,6 +276,17 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_psll_ws:
       ID = Intrinsic::riscv_psll_ws;
       break;
+    case RISCV::BI__builtin_riscv_padd_bs_32:
+    case RISCV::BI__builtin_riscv_padd_bs_64:
+      ID = Intrinsic::riscv_padd_bs;
+      break;
+    case RISCV::BI__builtin_riscv_padd_hs_32:
+    case RISCV::BI__builtin_riscv_padd_hs_64:
+      ID = Intrinsic::riscv_padd_hs;
+      break;
+    case RISCV::BI__builtin_riscv_padd_ws:
+      ID = Intrinsic::riscv_padd_ws;
+      break;
     case RISCV::BI__builtin_riscv_sadd:
       ID = Intrinsic::riscv_sadd;
       break;
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index ea10fb0a948d9..f3c6d20853b54 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -50,6 +50,16 @@ __riscv_psll_hs(uint32_t __x, uint32_t __y) {
   return __builtin_riscv_psll_hs_32(__x, __y);
 }
 
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_padd_bs(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_padd_bs_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_padd_hs(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_padd_hs_32(__x, __y);
+}
+
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
 __riscv_sadd(int32_t __x, int32_t __y) {
   return __builtin_riscv_sadd(__x, __y);
@@ -97,6 +107,21 @@ static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __riscv_psll_ws(uint64_t __x, uint64_t __y) {
   return __builtin_riscv_psll_ws(__x, __y);
 }
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_padd_bs(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_padd_bs_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_padd_hs(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_padd_hs_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_padd_ws(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_padd_ws(__x, __y);
+}
 #endif
 
 #endif // defined(__riscv_p)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
index e4850cf456e6b..feedca5c72817 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
@@ -59,6 +59,24 @@ uint32_t psll_hs(uint32_t rs1, uint32_t rs2) {
   return __riscv_psll_hs(rs1, rs2);
 }
 
+// RV32P-LABEL: @padd_bs(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.padd.bs.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t padd_bs(uint32_t rs1, uint32_t rs2) {
+  return __riscv_padd_bs(rs1, rs2);
+}
+
+// RV32P-LABEL: @padd_hs(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.padd.hs.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t padd_hs(uint32_t rs1, uint32_t rs2) {
+  return __riscv_padd_hs(rs1, rs2);
+}
+
 // RV32P-LABEL: @sadd(
 // RV32P-NEXT:  entry:
 // RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.sadd.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
index 6f7170f5335dd..4dc1cad0905e3 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -81,3 +81,30 @@ uint64_t psll_hs(uint64_t rs1, uint64_t rs2) {
 uint64_t psll_ws(uint64_t rs1, uint64_t rs2) {
   return __riscv_psll_ws(rs1, rs2);
 }
+
+// RV64P-LABEL: @padd_bs(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.padd.bs.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t padd_bs(uint64_t rs1, uint64_t rs2) {
+  return __riscv_padd_bs(rs1, rs2);
+}
+
+// RV64P-LABEL: @padd_hs(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.padd.hs.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t padd_hs(uint64_t rs1, uint64_t rs2) {
+  return __riscv_padd_hs(rs1, rs2);
+}
+
+// RV64P-LABEL: @padd_ws(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.padd.ws.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t padd_ws(uint64_t rs1, uint64_t rs2) {
+  return __riscv_padd_ws(rs1, rs2);
+}

>From 009317ef8dd3fafec668e63dd1b8d209fca412c9 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Fri, 29 Aug 2025 15:23:13 +0800
Subject: [PATCH 09/40] [RISCV] Packed Saturating Arithmetic Shift LLVM IR
 intrinsics

---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       | 24 ++++---
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td      | 30 +++++----
 llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll | 44 +++++++++++++
 llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll | 66 +++++++++++++++++++
 4 files changed, 145 insertions(+), 19 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index c42bfef9fc79a..c8b8ae237ed8c 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1910,13 +1910,21 @@ let TargetPrefix = "riscv" in {
   multiclass RVPBinaryIntrinsics {
     def "int_riscv_" # NAME   : RVPBinaryIntrinsics;
   }
-  defm psll_bs : RVPBinaryIntrinsics;
-  defm psll_hs : RVPBinaryIntrinsics;
-  defm psll_ws : RVPBinaryIntrinsics;
-  defm padd_bs : RVPBinaryIntrinsics;
-  defm padd_hs : RVPBinaryIntrinsics;
-  defm padd_ws : RVPBinaryIntrinsics;
-  defm sadd    : RVPBinaryIntrinsics;
+  defm psll_bs   : RVPBinaryIntrinsics;
+  defm psll_hs   : RVPBinaryIntrinsics;
+  defm psll_ws   : RVPBinaryIntrinsics;
+  defm padd_bs   : RVPBinaryIntrinsics;
+  defm padd_hs   : RVPBinaryIntrinsics;
+  defm padd_ws   : RVPBinaryIntrinsics;
+  defm pssha_hs  : RVPBinaryIntrinsics;
+  defm pssha_ws  : RVPBinaryIntrinsics;
+  defm sha       : RVPBinaryIntrinsics;
+  defm ssha      : RVPBinaryIntrinsics;
+  defm psshar_hs : RVPBinaryIntrinsics;
+  defm psshar_ws : RVPBinaryIntrinsics;
+  defm shar      : RVPBinaryIntrinsics;
+  defm sshar     : RVPBinaryIntrinsics;
+  defm sadd      : RVPBinaryIntrinsics;
 
   class RVPBinaryAABIntrinsics
       : Intrinsic<[llvm_any_ty],
@@ -1927,7 +1935,7 @@ let TargetPrefix = "riscv" in {
     def "int_riscv_" # NAME   : RVPBinaryAABIntrinsics;
   }
 
-  defm pslli_b  : RVPBinaryAABIntrinsics;
+      defm pslli_b  : RVPBinaryAABIntrinsics;
   defm pslli_h  : RVPBinaryAABIntrinsics;
   defm pslli_w  : RVPBinaryAABIntrinsics;
   defm psslai_h : RVPBinaryAABIntrinsics;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index fc00ba2d88ee4..008514f65db94 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -895,23 +895,31 @@ let Predicates = [HasStdExtP, IsRV32] in {
 //===----------------------------------------------------------------------===//
 
 let Predicates = [HasStdExtP] in {
-def : PatGprImm<int_riscv_pslli_b,  PSLLI_B,  uimm3>;
-def : PatGprImm<int_riscv_pslli_h,  PSLLI_H,  uimm4>;
-def : PatGprImm<int_riscv_psslai_h, PSSLAI_H, uimm4>;
-def : PatGprGpr<int_riscv_psll_bs,  PSLL_BS>;
-def : PatGprGpr<int_riscv_psll_hs,  PSLL_HS>;
-def : PatGprGpr<int_riscv_padd_bs,  PADD_BS>;
-def : PatGprGpr<int_riscv_padd_hs,  PADD_HS>;
+def : PatGprImm<int_riscv_pslli_b,   PSLLI_B,  uimm3>;
+def : PatGprImm<int_riscv_pslli_h,   PSLLI_H,  uimm4>;
+def : PatGprImm<int_riscv_psslai_h,  PSSLAI_H, uimm4>;
+def : PatGprGpr<int_riscv_psll_bs,   PSLL_BS>;
+def : PatGprGpr<int_riscv_psll_hs,   PSLL_HS>;
+def : PatGprGpr<int_riscv_padd_bs,   PADD_BS>;
+def : PatGprGpr<int_riscv_padd_hs,   PADD_HS>;
+def : PatGprGpr<int_riscv_pssha_hs,  PSSHA_HS>;
+def : PatGprGpr<int_riscv_psshar_hs, PSSHAR_HS>;
 } // Predicates = [HasStdExtP]
 
 let Predicates = [HasStdExtP, IsRV32] in {
 def : PatGprImm<int_riscv_sslai, SSLAI, uimm5>;
+def : PatGprGpr<int_riscv_ssha,  SSHA>;
+def : PatGprGpr<int_riscv_sshar, SSHAR>;
 def : PatGprGpr<int_riscv_sadd,  SADD>;
 } // Predicates = [HasStdExtP, IsRV32]
 
 let Predicates = [HasStdExtP, IsRV64] in {
-def : PatGprImm<int_riscv_pslli_w,  PSLLI_W,  uimm5>;
-def : PatGprImm<int_riscv_psslai_w, PSSLAI_W, uimm5>;
-def : PatGprGpr<int_riscv_psll_ws,  PSLL_WS>;
-def : PatGprGpr<int_riscv_padd_ws,  PADD_WS>;
+def : PatGprImm<int_riscv_pslli_w,   PSLLI_W,  uimm5>;
+def : PatGprImm<int_riscv_psslai_w,  PSSLAI_W, uimm5>;
+def : PatGprGpr<int_riscv_psll_ws,   PSLL_WS>;
+def : PatGprGpr<int_riscv_padd_ws,   PADD_WS>;
+def : PatGprGpr<int_riscv_pssha_ws,  PSSHA_WS>;
+def : PatGprGpr<int_riscv_sha,       SHA>;
+def : PatGprGpr<int_riscv_psshar_ws, PSSHAR_WS>;
+def : PatGprGpr<int_riscv_shar,      SHAR>;
 } // Predicates = [HasStdExtP, IsRV64]
\ No newline at end of file
diff --git a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
index b68f044bf44ac..d08b79be46409 100644
--- a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
@@ -90,6 +90,50 @@ define i32 @padd_hs(i32 %a, i32 %b) {
   ret i32 %tmp
 }
 
+declare i32 @llvm.riscv.pssha.hs.i32(i32, i32)
+
+define i32 @pssha_hs(i32 %a, i32 %b) {
+; RV32P-LABEL: pssha_hs:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pssha.hs a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pssha.hs.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.ssha.i32(i32, i32)
+
+define i32 @ssha(i32 %a, i32 %b) {
+; RV32P-LABEL: ssha:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    ssha a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.ssha.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.psshar.hs.i32(i32, i32)
+
+define i32 @psshar_hs(i32 %a, i32 %b) {
+; RV32P-LABEL: psshar_hs:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psshar.hs a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.psshar.hs.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.sshar.i32(i32, i32)
+
+define i32 @sshar(i32 %a, i32 %b) {
+; RV32P-LABEL: sshar:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    sshar a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.sshar.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
 declare i32 @llvm.riscv.sadd.i32(i32, i32)
 
 define i32 @sadd(i32 %a, i32 %b) {
diff --git a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
index 4a11f187a35f9..9801b03b36eaa 100644
--- a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
@@ -122,3 +122,69 @@ define i64 @padd_ws(i64 %a, i64 %b) {
   %tmp = call i64 @llvm.riscv.padd.ws.i64.i64(i64 %a, i64 %b)
   ret i64 %tmp
 }
+
+declare i64 @llvm.riscv.pssha.hs.i64(i64, i64)
+
+define i64 @pssha_hs(i64 %a, i64 %b) {
+; RV64P-LABEL: pssha_hs:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pssha.hs a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pssha.hs.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pssha.ws.i64(i64, i64)
+
+define i64 @pssha_ws(i64 %a, i64 %b) {
+; RV64P-LABEL: pssha_ws:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pssha.ws a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pssha.ws.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.sha.i64(i64, i64)
+
+define i64 @sha(i64 %a, i64 %b) {
+; RV64P-LABEL: sha:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    sha a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.sha.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psshar.hs.i64(i64, i64)
+
+define i64 @psshar_hs(i64 %a, i64 %b) {
+; RV64P-LABEL: psshar_hs:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psshar.hs a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psshar.hs.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psshar.ws.i64(i64, i64)
+
+define i64 @psshar_ws(i64 %a, i64 %b) {
+; RV64P-LABEL: psshar_ws:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psshar.ws a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psshar.ws.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.shar.i64(i64, i64)
+
+define i64 @shar(i64 %a, i64 %b) {
+; RV64P-LABEL: shar:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    shar a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.shar.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}

>From cd17f9dbff44b053e8e33f1f9c967f228677c0c9 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Fri, 29 Aug 2025 15:38:40 +0800
Subject: [PATCH 10/40] [RISCV] Packed Shift Right Logical Immediate LLVM IR
 intrinsics

---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       |  3 ++
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td      |  3 ++
 llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll | 22 +++++++++++++
 llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll | 33 +++++++++++++++++++
 4 files changed, 61 insertions(+)

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index c8b8ae237ed8c..1f2e477c5dc57 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1941,6 +1941,9 @@ let TargetPrefix = "riscv" in {
   defm psslai_h : RVPBinaryAABIntrinsics;
   defm psslai_w : RVPBinaryAABIntrinsics;
   defm sslai    : RVPBinaryAABIntrinsics;
+  defm psrli_b  : RVPBinaryAABIntrinsics;
+  defm psrli_h  : RVPBinaryAABIntrinsics;
+  defm psrli_w  : RVPBinaryAABIntrinsics;
 } // TargetPrefix = "riscv"
 
 // Vendor extensions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 008514f65db94..0ff961f841d76 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -904,6 +904,8 @@ def : PatGprGpr<int_riscv_padd_bs,   PADD_BS>;
 def : PatGprGpr<int_riscv_padd_hs,   PADD_HS>;
 def : PatGprGpr<int_riscv_pssha_hs,  PSSHA_HS>;
 def : PatGprGpr<int_riscv_psshar_hs, PSSHAR_HS>;
+def : PatGprImm<int_riscv_psrli_b,   PSRLI_B, uimm3>;
+def : PatGprImm<int_riscv_psrli_h,   PSRLI_H, uimm4>;
 } // Predicates = [HasStdExtP]
 
 let Predicates = [HasStdExtP, IsRV32] in {
@@ -922,4 +924,5 @@ def : PatGprGpr<int_riscv_pssha_ws,  PSSHA_WS>;
 def : PatGprGpr<int_riscv_sha,       SHA>;
 def : PatGprGpr<int_riscv_psshar_ws, PSSHAR_WS>;
 def : PatGprGpr<int_riscv_shar,      SHAR>;
+def : PatGprImm<int_riscv_psrli_w,   PSRLI_W, uimm5>;
 } // Predicates = [HasStdExtP, IsRV64]
\ No newline at end of file
diff --git a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
index d08b79be46409..42a43a53a7479 100644
--- a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
@@ -134,6 +134,28 @@ define i32 @sshar(i32 %a, i32 %b) {
     ret i32 %tmp
 }
 
+declare i32 @llvm.riscv.psrli.b.i32.i32(i32, i32)
+
+define i32 @psrli_b(i32 %a, i32 %b) {
+; RV32P-LABEL: psrli_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psrli.b a0, a0, 1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.psrli.b.i32.i32(i32 %a, i32 1)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.psrli.h.i32.i32(i32, i32)
+
+define i32 @psrli_h(i32 %a, i32 %b) {
+; RV32P-LABEL: psrli_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psrli.h a0, a0, 1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.psrli.h.i32.i32(i32 %a, i32 1)
+    ret i32 %tmp
+}
+
 declare i32 @llvm.riscv.sadd.i32(i32, i32)
 
 define i32 @sadd(i32 %a, i32 %b) {
diff --git a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
index 9801b03b36eaa..3dcd3413ab74d 100644
--- a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
@@ -188,3 +188,36 @@ define i64 @shar(i64 %a, i64 %b) {
     %tmp = call i64 @llvm.riscv.shar.i64(i64 %a, i64 %b)
     ret i64 %tmp
 }
+
+declare i64 @llvm.riscv.psrli.b.i64.i64(i64, i64)
+
+define i64 @psrli_b(i64 %a, i64 %b) {
+; RV64P-LABEL: psrli_b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psrli.b a0, a0, 1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psrli.b.i64.i64(i64 %a, i64 1)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psrli.h.i64.i64(i64, i64)
+
+define i64 @psrli_h(i64 %a, i64 %b) {
+; RV64P-LABEL: psrli_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psrli.h a0, a0, 1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psrli.h.i64.i64(i64 %a, i64 1)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psrli.w.i64.i64(i64, i64)
+
+define i64 @psrli_w(i64 %a, i64 %b) {
+; RV64P-LABEL: psrli_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psrli.w a0, a0, 1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psrli.w.i64.i64(i64 %a, i64 1)
+    ret i64 %tmp
+}

>From 510a060557a30d86c7f3096bbd25f129d8af97a6 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Mon, 1 Sep 2025 15:54:34 +0800
Subject: [PATCH 11/40] [RISCV] Packed Unsigned Saturating Immediate LLVM IR
 intrinsics

---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       |  3 ++
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td      | 10 ++++--
 llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll | 22 +++++++++++++
 llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll | 33 +++++++++++++++++++
 4 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 1f2e477c5dc57..3875584029ff4 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1944,6 +1944,9 @@ let TargetPrefix = "riscv" in {
   defm psrli_b  : RVPBinaryAABIntrinsics;
   defm psrli_h  : RVPBinaryAABIntrinsics;
   defm psrli_w  : RVPBinaryAABIntrinsics;
+  defm pusati_h : RVPBinaryAABIntrinsics;
+  defm pusati_w : RVPBinaryAABIntrinsics;
+  defm usati    : RVPBinaryAABIntrinsics;
 } // TargetPrefix = "riscv"
 
 // Vendor extensions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 0ff961f841d76..2ccbd9a9ef374 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -904,14 +904,16 @@ def : PatGprGpr<int_riscv_padd_bs,   PADD_BS>;
 def : PatGprGpr<int_riscv_padd_hs,   PADD_HS>;
 def : PatGprGpr<int_riscv_pssha_hs,  PSSHA_HS>;
 def : PatGprGpr<int_riscv_psshar_hs, PSSHAR_HS>;
-def : PatGprImm<int_riscv_psrli_b,   PSRLI_B, uimm3>;
-def : PatGprImm<int_riscv_psrli_h,   PSRLI_H, uimm4>;
+def : PatGprImm<int_riscv_psrli_b,   PSRLI_B,  uimm3>;
+def : PatGprImm<int_riscv_psrli_h,   PSRLI_H,  uimm4>;
+def : PatGprImm<int_riscv_pusati_h,  PUSATI_H, uimm4>;
 } // Predicates = [HasStdExtP]
 
 let Predicates = [HasStdExtP, IsRV32] in {
 def : PatGprImm<int_riscv_sslai, SSLAI, uimm5>;
 def : PatGprGpr<int_riscv_ssha,  SSHA>;
 def : PatGprGpr<int_riscv_sshar, SSHAR>;
+def : PatGprImm<int_riscv_usati, USATI_RV32, uimm5>;
 def : PatGprGpr<int_riscv_sadd,  SADD>;
 } // Predicates = [HasStdExtP, IsRV32]
 
@@ -924,5 +926,7 @@ def : PatGprGpr<int_riscv_pssha_ws,  PSSHA_WS>;
 def : PatGprGpr<int_riscv_sha,       SHA>;
 def : PatGprGpr<int_riscv_psshar_ws, PSSHAR_WS>;
 def : PatGprGpr<int_riscv_shar,      SHAR>;
-def : PatGprImm<int_riscv_psrli_w,   PSRLI_W, uimm5>;
+def : PatGprImm<int_riscv_psrli_w,   PSRLI_W,  uimm5>;
+def : PatGprImm<int_riscv_pusati_w,  PUSATI_W, uimm5>;
+def : PatGprImm<int_riscv_usati,     USATI_RV64, uimm6>;
 } // Predicates = [HasStdExtP, IsRV64]
\ No newline at end of file
diff --git a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
index 42a43a53a7479..83c847d5cfa8c 100644
--- a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
@@ -156,6 +156,28 @@ define i32 @psrli_h(i32 %a, i32 %b) {
     ret i32 %tmp
 }
 
+declare i32 @llvm.riscv.pusati.h.i32.i32(i32, i32)
+
+define i32 @pusati_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pusati_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pusati.h a0, a0, 1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pusati.h.i32.i32(i32 %a, i32 1)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.usati.i32.i32(i32, i32)
+
+define i32 @usati(i32 %a, i32 %b) {
+; RV32P-LABEL: usati:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    usati a0, a0, 1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.usati.i32.i32(i32 %a, i32 1)
+    ret i32 %tmp
+}
+
 declare i32 @llvm.riscv.sadd.i32(i32, i32)
 
 define i32 @sadd(i32 %a, i32 %b) {
diff --git a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
index 3dcd3413ab74d..eb74e2462ff6f 100644
--- a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
@@ -221,3 +221,36 @@ define i64 @psrli_w(i64 %a, i64 %b) {
     %tmp = call i64 @llvm.riscv.psrli.w.i64.i64(i64 %a, i64 1)
     ret i64 %tmp
 }
+
+declare i64 @llvm.riscv.pusati.h.i64.i64(i64, i64)
+
+define i64 @pusati_h(i64 %a, i64 %b) {
+; RV64P-LABEL: pusati_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pusati.h a0, a0, 1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pusati.h.i64.i64(i64 %a, i64 1)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pusati.w.i64.i64(i64, i64)
+
+define i64 @pusati_w(i64 %a, i64 %b) {
+; RV64P-LABEL: pusati_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pusati.w a0, a0, 1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pusati.w.i64.i64(i64 %a, i64 1)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.usati.i64.i64(i64, i64)
+
+define i64 @usati(i64 %a, i64 %b) {
+; RV64P-LABEL: usati:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    usati a0, a0, 1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.usati.i64.i64(i64 %a, i64 1)
+    ret i64 %tmp
+}

>From 214d8c2ea84794c017ee69e6c07b5766ee688be4 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Mon, 1 Sep 2025 19:17:11 +0800
Subject: [PATCH 12/40] [RISCV] Add some P-ext LLVM IR intrinsics

Packed Arithmetic Shift Right Immediate intrinsics
Packed Signed Saturating Immediate intrinsics
---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       |  9 ++
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td      | 11 +++
 llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll | 66 +++++++++++++
 llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll | 99 +++++++++++++++++++
 4 files changed, 185 insertions(+)

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 3875584029ff4..f5aeed17b92a2 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1947,6 +1947,15 @@ let TargetPrefix = "riscv" in {
   defm pusati_h : RVPBinaryAABIntrinsics;
   defm pusati_w : RVPBinaryAABIntrinsics;
   defm usati    : RVPBinaryAABIntrinsics;
+  defm psrai_b  : RVPBinaryAABIntrinsics;
+  defm psrai_h  : RVPBinaryAABIntrinsics;
+  defm psrai_w  : RVPBinaryAABIntrinsics;
+  defm psrari_h : RVPBinaryAABIntrinsics;
+  defm psrari_w : RVPBinaryAABIntrinsics;
+  defm srari    : RVPBinaryAABIntrinsics;
+  defm psati_h  : RVPBinaryAABIntrinsics;
+  defm psati_w  : RVPBinaryAABIntrinsics;
+  defm sati     : RVPBinaryAABIntrinsics;
 } // TargetPrefix = "riscv"
 
 // Vendor extensions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 2ccbd9a9ef374..820bfa87c1492 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -907,6 +907,10 @@ def : PatGprGpr<int_riscv_psshar_hs, PSSHAR_HS>;
 def : PatGprImm<int_riscv_psrli_b,   PSRLI_B,  uimm3>;
 def : PatGprImm<int_riscv_psrli_h,   PSRLI_H,  uimm4>;
 def : PatGprImm<int_riscv_pusati_h,  PUSATI_H, uimm4>;
+def : PatGprImm<int_riscv_psrai_b,   PSRAI_B,  uimm3>;
+def : PatGprImm<int_riscv_psrai_h,   PSRAI_H,  uimm4>;
+def : PatGprImm<int_riscv_psrari_h,  PSRARI_H, uimm4>;
+def : PatGprImm<int_riscv_psati_h,   PSATI_H,  uimm4>;
 } // Predicates = [HasStdExtP]
 
 let Predicates = [HasStdExtP, IsRV32] in {
@@ -914,6 +918,8 @@ def : PatGprImm<int_riscv_sslai, SSLAI, uimm5>;
 def : PatGprGpr<int_riscv_ssha,  SSHA>;
 def : PatGprGpr<int_riscv_sshar, SSHAR>;
 def : PatGprImm<int_riscv_usati, USATI_RV32, uimm5>;
+def : PatGprImm<int_riscv_srari, SRARI_RV32, uimm5>;
+def : PatGprImm<int_riscv_sati,  SATI_RV32,  uimm5>;
 def : PatGprGpr<int_riscv_sadd,  SADD>;
 } // Predicates = [HasStdExtP, IsRV32]
 
@@ -929,4 +935,9 @@ def : PatGprGpr<int_riscv_shar,      SHAR>;
 def : PatGprImm<int_riscv_psrli_w,   PSRLI_W,  uimm5>;
 def : PatGprImm<int_riscv_pusati_w,  PUSATI_W, uimm5>;
 def : PatGprImm<int_riscv_usati,     USATI_RV64, uimm6>;
+def : PatGprImm<int_riscv_psrai_w,   PSRAI_W,    uimm5>;
+def : PatGprImm<int_riscv_psrari_w,  PSRARI_W,   uimm5>;
+def : PatGprImm<int_riscv_srari,     SRARI_RV64, uimm6>;
+def : PatGprImm<int_riscv_psati_w,   PSATI_W,    uimm5>;
+def : PatGprImm<int_riscv_sati,      SATI_RV64,  uimm6>;
 } // Predicates = [HasStdExtP, IsRV64]
\ No newline at end of file
diff --git a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
index 83c847d5cfa8c..25419db754aef 100644
--- a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
@@ -178,6 +178,72 @@ define i32 @usati(i32 %a, i32 %b) {
     ret i32 %tmp
 }
 
+declare i32 @llvm.riscv.psrai.b.i32.i32(i32, i32)
+
+define i32 @psrai_b(i32 %a, i32 %b) {
+; RV32P-LABEL: psrai_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psrai.b a0, a0, 1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.psrai.b.i32.i32(i32 %a, i32 1)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.psrai.h.i32.i32(i32, i32)
+
+define i32 @psrai_h(i32 %a, i32 %b) {
+; RV32P-LABEL: psrai_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psrai.h a0, a0, 1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.psrai.h.i32.i32(i32 %a, i32 1)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.psrari.h.i32.i32(i32, i32)
+
+define i32 @psrari_h(i32 %a, i32 %b) {
+; RV32P-LABEL: psrari_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psrari.h a0, a0, 1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.psrari.h.i32.i32(i32 %a, i32 1)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.srari.i32.i32(i32, i32)
+
+define i32 @srari_32(i32 %a, i32 %b) {
+; RV32P-LABEL: srari_32:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    srari a0, a0, 1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.srari.i32.i32(i32 %a, i32 1)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.psati.h.i32.i32(i32, i32)
+
+define i32 @psati_h(i32 %a, i32 %b) {
+; RV32P-LABEL: psati_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psati.h a0, a0, 1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.psati.h.i32.i32(i32 %a, i32 1)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.sati.i32.i32(i32, i32)
+
+define i32 @sati(i32 %a, i32 %b) {
+; RV32P-LABEL: sati:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    sati a0, a0, 1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.sati.i32.i32(i32 %a, i32 1)
+    ret i32 %tmp
+}
+
 declare i32 @llvm.riscv.sadd.i32(i32, i32)
 
 define i32 @sadd(i32 %a, i32 %b) {
diff --git a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
index eb74e2462ff6f..a6ef863a75be4 100644
--- a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
@@ -254,3 +254,102 @@ define i64 @usati(i64 %a, i64 %b) {
     %tmp = call i64 @llvm.riscv.usati.i64.i64(i64 %a, i64 1)
     ret i64 %tmp
 }
+
+declare i64 @llvm.riscv.psrai.b.i64.i64(i64, i64)
+
+define i64 @psrai_b(i64 %a, i64 %b) {
+; RV64P-LABEL: psrai_b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psrai.b a0, a0, 1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psrai.b.i64.i64(i64 %a, i64 1)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psrai.h.i64.i64(i64, i64)
+
+define i64 @psrai_h(i64 %a, i64 %b) {
+; RV64P-LABEL: psrai_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psrai.h a0, a0, 1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psrai.h.i64.i64(i64 %a, i64 1)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psrai.w.i64.i64(i64, i64)
+
+define i64 @psrai_w(i64 %a, i64 %b) {
+; RV64P-LABEL: psrai_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psrai.w a0, a0, 1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psrai.w.i64.i64(i64 %a, i64 1)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psrari.h.i64.i64(i64, i64)
+
+define i64 @psrari_h(i64 %a, i64 %b) {
+; RV64P-LABEL: psrari_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psrari.h a0, a0, 1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psrari.h.i64.i64(i64 %a, i64 1)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psrari.w.i64.i64(i64, i64)
+
+define i64 @psrari_w(i64 %a, i64 %b) {
+; RV64P-LABEL: psrari_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psrari.w a0, a0, 1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psrari.w.i64.i64(i64 %a, i64 1)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.srari.i64.i64(i64, i64)
+
+define i64 @srari_64(i64 %a, i64 %b) {
+; RV64P-LABEL: srari_64:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    srari a0, a0, 1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.srari.i64.i64(i64 %a, i64 1)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psati.h.i64.i64(i64, i64)
+
+define i64 @psati_h(i64 %a, i64 %b) {
+; RV64P-LABEL: psati_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psati.h a0, a0, 1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psati.h.i64.i64(i64 %a, i64 1)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psati.w.i64.i64(i64, i64)
+
+define i64 @psati_w(i64 %a, i64 %b) {
+; RV64P-LABEL: psati_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psati.w a0, a0, 1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psati.w.i64.i64(i64 %a, i64 1)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.sati.i64.i64(i64, i64)
+
+define i64 @sati(i64 %a, i64 %b) {
+; RV64P-LABEL: sati:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    sati a0, a0, 1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.sati.i64.i64(i64 %a, i64 1)
+    ret i64 %tmp
+}

>From c6c4d5f7bd98bf63300d98763591c6b995ef6506 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Tue, 2 Sep 2025 15:27:51 +0800
Subject: [PATCH 13/40] [RISCV] Add some P-ext LLVM IR intrinsics

Packed Shift Right Logical Register intrinsics
Packed Predicated Summation intrinsics
Packed Arithmetic Shift Right Register intrinsics
---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       |  42 ++++--
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td      |  76 +++++-----
 llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll |  88 ++++++++++++
 llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll | 132 ++++++++++++++++++
 4 files changed, 291 insertions(+), 47 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index f5aeed17b92a2..731b0d4856975 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1910,21 +1910,33 @@ let TargetPrefix = "riscv" in {
   multiclass RVPBinaryIntrinsics {
     def "int_riscv_" # NAME   : RVPBinaryIntrinsics;
   }
-  defm psll_bs   : RVPBinaryIntrinsics;
-  defm psll_hs   : RVPBinaryIntrinsics;
-  defm psll_ws   : RVPBinaryIntrinsics;
-  defm padd_bs   : RVPBinaryIntrinsics;
-  defm padd_hs   : RVPBinaryIntrinsics;
-  defm padd_ws   : RVPBinaryIntrinsics;
-  defm pssha_hs  : RVPBinaryIntrinsics;
-  defm pssha_ws  : RVPBinaryIntrinsics;
-  defm sha       : RVPBinaryIntrinsics;
-  defm ssha      : RVPBinaryIntrinsics;
-  defm psshar_hs : RVPBinaryIntrinsics;
-  defm psshar_ws : RVPBinaryIntrinsics;
-  defm shar      : RVPBinaryIntrinsics;
-  defm sshar     : RVPBinaryIntrinsics;
-  defm sadd      : RVPBinaryIntrinsics;
+  defm psll_bs     : RVPBinaryIntrinsics;
+  defm psll_hs     : RVPBinaryIntrinsics;
+  defm psll_ws     : RVPBinaryIntrinsics;
+  defm padd_bs     : RVPBinaryIntrinsics;
+  defm padd_hs     : RVPBinaryIntrinsics;
+  defm padd_ws     : RVPBinaryIntrinsics;
+  defm pssha_hs    : RVPBinaryIntrinsics;
+  defm pssha_ws    : RVPBinaryIntrinsics;
+  defm sha         : RVPBinaryIntrinsics;
+  defm ssha        : RVPBinaryIntrinsics;
+  defm psshar_hs   : RVPBinaryIntrinsics;
+  defm psshar_ws   : RVPBinaryIntrinsics;
+  defm shar        : RVPBinaryIntrinsics;
+  defm sshar       : RVPBinaryIntrinsics;
+  defm sadd        : RVPBinaryIntrinsics;
+  defm psrl_bs     : RVPBinaryIntrinsics;
+  defm psrl_hs     : RVPBinaryIntrinsics;
+  defm psrl_ws     : RVPBinaryIntrinsics;
+  defm predsum_bs  : RVPBinaryIntrinsics;
+  defm predsum_hs  : RVPBinaryIntrinsics;
+  defm predsum_ws  : RVPBinaryIntrinsics;
+  defm predsumu_bs : RVPBinaryIntrinsics;
+  defm predsumu_hs : RVPBinaryIntrinsics;
+  defm predsumu_ws : RVPBinaryIntrinsics;
+  defm psra_bs     : RVPBinaryIntrinsics;
+  defm psra_hs     : RVPBinaryIntrinsics;
+  defm psra_ws     : RVPBinaryIntrinsics;
 
   class RVPBinaryAABIntrinsics
       : Intrinsic<[llvm_any_ty],
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 820bfa87c1492..d4ce20ddaa9b7 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -895,22 +895,30 @@ let Predicates = [HasStdExtP, IsRV32] in {
 //===----------------------------------------------------------------------===//
 
 let Predicates = [HasStdExtP] in {
-def : PatGprImm<int_riscv_pslli_b,   PSLLI_B,  uimm3>;
-def : PatGprImm<int_riscv_pslli_h,   PSLLI_H,  uimm4>;
-def : PatGprImm<int_riscv_psslai_h,  PSSLAI_H, uimm4>;
-def : PatGprGpr<int_riscv_psll_bs,   PSLL_BS>;
-def : PatGprGpr<int_riscv_psll_hs,   PSLL_HS>;
-def : PatGprGpr<int_riscv_padd_bs,   PADD_BS>;
-def : PatGprGpr<int_riscv_padd_hs,   PADD_HS>;
-def : PatGprGpr<int_riscv_pssha_hs,  PSSHA_HS>;
-def : PatGprGpr<int_riscv_psshar_hs, PSSHAR_HS>;
-def : PatGprImm<int_riscv_psrli_b,   PSRLI_B,  uimm3>;
-def : PatGprImm<int_riscv_psrli_h,   PSRLI_H,  uimm4>;
-def : PatGprImm<int_riscv_pusati_h,  PUSATI_H, uimm4>;
-def : PatGprImm<int_riscv_psrai_b,   PSRAI_B,  uimm3>;
-def : PatGprImm<int_riscv_psrai_h,   PSRAI_H,  uimm4>;
-def : PatGprImm<int_riscv_psrari_h,  PSRARI_H, uimm4>;
-def : PatGprImm<int_riscv_psati_h,   PSATI_H,  uimm4>;
+def : PatGprImm<int_riscv_pslli_b,     PSLLI_B,  uimm3>;
+def : PatGprImm<int_riscv_pslli_h,     PSLLI_H,  uimm4>;
+def : PatGprImm<int_riscv_psslai_h,    PSSLAI_H, uimm4>;
+def : PatGprGpr<int_riscv_psll_bs,     PSLL_BS>;
+def : PatGprGpr<int_riscv_psll_hs,     PSLL_HS>;
+def : PatGprGpr<int_riscv_padd_bs,     PADD_BS>;
+def : PatGprGpr<int_riscv_padd_hs,     PADD_HS>;
+def : PatGprGpr<int_riscv_pssha_hs,    PSSHA_HS>;
+def : PatGprGpr<int_riscv_psshar_hs,   PSSHAR_HS>;
+def : PatGprImm<int_riscv_psrli_b,     PSRLI_B,  uimm3>;
+def : PatGprImm<int_riscv_psrli_h,     PSRLI_H,  uimm4>;
+def : PatGprImm<int_riscv_pusati_h,    PUSATI_H, uimm4>;
+def : PatGprImm<int_riscv_psrai_b,     PSRAI_B,  uimm3>;
+def : PatGprImm<int_riscv_psrai_h,     PSRAI_H,  uimm4>;
+def : PatGprImm<int_riscv_psrari_h,    PSRARI_H, uimm4>;
+def : PatGprImm<int_riscv_psati_h,     PSATI_H,  uimm4>;
+def : PatGprGpr<int_riscv_psrl_bs,     PSRL_BS>;
+def : PatGprGpr<int_riscv_psrl_hs,     PSRL_HS>;
+def : PatGprGpr<int_riscv_predsum_bs,  PREDSUM_BS>;
+def : PatGprGpr<int_riscv_predsum_hs,  PREDSUM_HS>;
+def : PatGprGpr<int_riscv_predsumu_bs, PREDSUMU_BS>;
+def : PatGprGpr<int_riscv_predsumu_hs, PREDSUMU_HS>;
+def : PatGprGpr<int_riscv_psra_bs,     PSRA_BS>;
+def : PatGprGpr<int_riscv_psra_hs,     PSRA_HS>;
 } // Predicates = [HasStdExtP]
 
 let Predicates = [HasStdExtP, IsRV32] in {
@@ -924,20 +932,24 @@ def : PatGprGpr<int_riscv_sadd,  SADD>;
 } // Predicates = [HasStdExtP, IsRV32]
 
 let Predicates = [HasStdExtP, IsRV64] in {
-def : PatGprImm<int_riscv_pslli_w,   PSLLI_W,  uimm5>;
-def : PatGprImm<int_riscv_psslai_w,  PSSLAI_W, uimm5>;
-def : PatGprGpr<int_riscv_psll_ws,   PSLL_WS>;
-def : PatGprGpr<int_riscv_padd_ws,   PADD_WS>;
-def : PatGprGpr<int_riscv_pssha_ws,  PSSHA_WS>;
-def : PatGprGpr<int_riscv_sha,       SHA>;
-def : PatGprGpr<int_riscv_psshar_ws, PSSHAR_WS>;
-def : PatGprGpr<int_riscv_shar,      SHAR>;
-def : PatGprImm<int_riscv_psrli_w,   PSRLI_W,  uimm5>;
-def : PatGprImm<int_riscv_pusati_w,  PUSATI_W, uimm5>;
-def : PatGprImm<int_riscv_usati,     USATI_RV64, uimm6>;
-def : PatGprImm<int_riscv_psrai_w,   PSRAI_W,    uimm5>;
-def : PatGprImm<int_riscv_psrari_w,  PSRARI_W,   uimm5>;
-def : PatGprImm<int_riscv_srari,     SRARI_RV64, uimm6>;
-def : PatGprImm<int_riscv_psati_w,   PSATI_W,    uimm5>;
-def : PatGprImm<int_riscv_sati,      SATI_RV64,  uimm6>;
+def : PatGprImm<int_riscv_pslli_w,     PSLLI_W,  uimm5>;
+def : PatGprImm<int_riscv_psslai_w,    PSSLAI_W, uimm5>;
+def : PatGprGpr<int_riscv_psll_ws,     PSLL_WS>;
+def : PatGprGpr<int_riscv_padd_ws,     PADD_WS>;
+def : PatGprGpr<int_riscv_pssha_ws,    PSSHA_WS>;
+def : PatGprGpr<int_riscv_sha,         SHA>;
+def : PatGprGpr<int_riscv_psshar_ws,   PSSHAR_WS>;
+def : PatGprGpr<int_riscv_shar,        SHAR>;
+def : PatGprImm<int_riscv_psrli_w,     PSRLI_W,  uimm5>;
+def : PatGprImm<int_riscv_pusati_w,    PUSATI_W, uimm5>;
+def : PatGprImm<int_riscv_usati,       USATI_RV64, uimm6>;
+def : PatGprImm<int_riscv_psrai_w,     PSRAI_W,    uimm5>;
+def : PatGprImm<int_riscv_psrari_w,    PSRARI_W,   uimm5>;
+def : PatGprImm<int_riscv_srari,       SRARI_RV64, uimm6>;
+def : PatGprImm<int_riscv_psati_w,     PSATI_W,    uimm5>;
+def : PatGprImm<int_riscv_sati,        SATI_RV64,  uimm6>;
+def : PatGprGpr<int_riscv_psrl_ws,     PSRL_WS>;
+def : PatGprGpr<int_riscv_predsum_ws,  PREDSUM_WS>;
+def : PatGprGpr<int_riscv_predsumu_ws, PREDSUMU_WS>;
+def : PatGprGpr<int_riscv_psra_ws,     PSRA_WS>;
 } // Predicates = [HasStdExtP, IsRV64]
\ No newline at end of file
diff --git a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
index 25419db754aef..ec2ef5de7b19e 100644
--- a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
@@ -244,6 +244,94 @@ define i32 @sati(i32 %a, i32 %b) {
     ret i32 %tmp
 }
 
+declare i32 @llvm.riscv.psrl.bs.i32(i32, i32)
+
+define i32 @psrl_bs(i32 %a, i32 %b) {
+; RV32P-LABEL: psrl_bs:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psrl.bs a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.psrl.bs.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.psrl.hs.i32(i32, i32)
+
+define i32 @psrl_hs(i32 %a, i32 %b) {
+; RV32P-LABEL: psrl_hs:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psrl.hs a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.psrl.hs.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.predsum.bs.i32(i32, i32)
+
+define i32 @predsum_bs(i32 %a, i32 %b) {
+; RV32P-LABEL: predsum_bs:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    predsum.bs a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.predsum.bs.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.predsum.hs.i32(i32, i32)
+
+define i32 @predsum_hs(i32 %a, i32 %b) {
+; RV32P-LABEL: predsum_hs:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    predsum.hs a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.predsum.hs.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.predsumu.bs.i32(i32, i32)
+
+define i32 @predsumu_bs(i32 %a, i32 %b) {
+; RV32P-LABEL: predsumu_bs:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    predsumu.bs a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.predsumu.bs.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.predsumu.hs.i32(i32, i32)
+
+define i32 @predsumu_hs(i32 %a, i32 %b) {
+; RV32P-LABEL: predsumu_hs:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    predsumu.hs a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.predsumu.hs.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.psra.bs.i32(i32, i32)
+
+define i32 @psra_bs(i32 %a, i32 %b) {
+; RV32P-LABEL: psra_bs:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psra.bs a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.psra.bs.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.psra.hs.i32(i32, i32)
+
+define i32 @psra_hs(i32 %a, i32 %b) {
+; RV32P-LABEL: psra_hs:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psra.hs a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.psra.hs.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
 declare i32 @llvm.riscv.sadd.i32(i32, i32)
 
 define i32 @sadd(i32 %a, i32 %b) {
diff --git a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
index a6ef863a75be4..b568044ca31d2 100644
--- a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
@@ -353,3 +353,135 @@ define i64 @sati(i64 %a, i64 %b) {
     %tmp = call i64 @llvm.riscv.sati.i64.i64(i64 %a, i64 1)
     ret i64 %tmp
 }
+
+declare i64 @llvm.riscv.psrl.bs.i64(i64, i64)
+
+define i64 @psrl_bs(i64 %a, i64 %b) {
+; RV64P-LABEL: psrl_bs:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psrl.bs a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psrl.bs.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psrl.hs.i64(i64, i64)
+
+define i64 @psrl_hs(i64 %a, i64 %b) {
+; RV64P-LABEL: psrl_hs:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psrl.hs a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psrl.hs.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psrl.ws.i64(i64, i64)
+
+define i64 @psrl_ws(i64 %a, i64 %b) {
+; RV64P-LABEL: psrl_ws:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psrl.ws a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psrl.ws.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.predsum.bs.i64(i64, i64)
+
+define i64 @predsum_bs(i64 %a, i64 %b) {
+; RV64P-LABEL: predsum_bs:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    predsum.bs a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.predsum.bs.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.predsum.hs.i64(i64, i64)
+
+define i64 @predsum_hs(i64 %a, i64 %b) {
+; RV64P-LABEL: predsum_hs:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    predsum.hs a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.predsum.hs.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.predsum.ws.i64(i64, i64)
+
+define i64 @predsum_ws(i64 %a, i64 %b) {
+; RV64P-LABEL: predsum_ws:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    predsum.ws a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.predsum.ws.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.predsumu.bs.i64(i64, i64)
+
+define i64 @predsumu_bs(i64 %a, i64 %b) {
+; RV64P-LABEL: predsumu_bs:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    predsumu.bs a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.predsumu.bs.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.predsumu.hs.i64(i64, i64)
+
+define i64 @predsumu_hs(i64 %a, i64 %b) {
+; RV64P-LABEL: predsumu_hs:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    predsumu.hs a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.predsumu.hs.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.predsumu.ws.i64(i64, i64)
+
+define i64 @predsumu_ws(i64 %a, i64 %b) {
+; RV64P-LABEL: predsumu_ws:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    predsumu.ws a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.predsumu.ws.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psra.bs.i64(i64, i64)
+
+define i64 @psra_bs(i64 %a, i64 %b) {
+; RV64P-LABEL: psra_bs:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psra.bs a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psra.bs.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psra.hs.i64(i64, i64)
+
+define i64 @psra_hs(i64 %a, i64 %b) {
+; RV64P-LABEL: psra_hs:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psra.hs a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psra.hs.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psra.ws.i64(i64, i64)
+
+define i64 @psra_ws(i64 %a, i64 %b) {
+; RV64P-LABEL: psra_ws:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psra.ws a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psra.ws.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}

>From d6a8e2fc49b47975f9a48ff09e0e45bb985d70ed Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Tue, 2 Sep 2025 15:35:20 +0800
Subject: [PATCH 14/40] [RISCV] Packed Addition and Saturating Addition LLVM IR
 intrinsics

---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       |  20 ++-
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td      |  18 ++
 llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll | 147 +++++++++++++++-
 llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll | 165 ++++++++++++++++++
 4 files changed, 347 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 731b0d4856975..eca7c7fc97d5a 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1924,7 +1924,6 @@ let TargetPrefix = "riscv" in {
   defm psshar_ws   : RVPBinaryIntrinsics;
   defm shar        : RVPBinaryIntrinsics;
   defm sshar       : RVPBinaryIntrinsics;
-  defm sadd        : RVPBinaryIntrinsics;
   defm psrl_bs     : RVPBinaryIntrinsics;
   defm psrl_hs     : RVPBinaryIntrinsics;
   defm psrl_ws     : RVPBinaryIntrinsics;
@@ -1937,6 +1936,25 @@ let TargetPrefix = "riscv" in {
   defm psra_bs     : RVPBinaryIntrinsics;
   defm psra_hs     : RVPBinaryIntrinsics;
   defm psra_ws     : RVPBinaryIntrinsics;
+  defm padd_b      : RVPBinaryIntrinsics;
+  defm padd_h      : RVPBinaryIntrinsics;
+  defm padd_w      : RVPBinaryIntrinsics;
+  defm sadd        : RVPBinaryIntrinsics;
+  defm psadd_b     : RVPBinaryIntrinsics;
+  defm psadd_h     : RVPBinaryIntrinsics;
+  defm psadd_w     : RVPBinaryIntrinsics;
+  defm aadd        : RVPBinaryIntrinsics;
+  defm paadd_b     : RVPBinaryIntrinsics;
+  defm paadd_h     : RVPBinaryIntrinsics;
+  defm paadd_w     : RVPBinaryIntrinsics;
+  defm saddu       : RVPBinaryIntrinsics;
+  defm psaddu_b    : RVPBinaryIntrinsics;
+  defm psaddu_h    : RVPBinaryIntrinsics;
+  defm psaddu_w    : RVPBinaryIntrinsics;
+  defm aaddu       : RVPBinaryIntrinsics;
+  defm paaddu_b    : RVPBinaryIntrinsics;
+  defm paaddu_h    : RVPBinaryIntrinsics;
+  defm paaddu_w    : RVPBinaryIntrinsics;
 
   class RVPBinaryAABIntrinsics
       : Intrinsic<[llvm_any_ty],
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index d4ce20ddaa9b7..2954c89f1b223 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -919,6 +919,16 @@ def : PatGprGpr<int_riscv_predsumu_bs, PREDSUMU_BS>;
 def : PatGprGpr<int_riscv_predsumu_hs, PREDSUMU_HS>;
 def : PatGprGpr<int_riscv_psra_bs,     PSRA_BS>;
 def : PatGprGpr<int_riscv_psra_hs,     PSRA_HS>;
+def : PatGprGpr<int_riscv_padd_b,      PADD_B>;
+def : PatGprGpr<int_riscv_padd_h,      PADD_H>;
+def : PatGprGpr<int_riscv_psadd_b,     PSADD_B>;
+def : PatGprGpr<int_riscv_psadd_h,     PSADD_H>;
+def : PatGprGpr<int_riscv_paadd_b,     PAADD_B>;
+def : PatGprGpr<int_riscv_paadd_h,     PAADD_H>;
+def : PatGprGpr<int_riscv_psaddu_b,    PSADDU_B>;
+def : PatGprGpr<int_riscv_psaddu_h,    PSADDU_H>;
+def : PatGprGpr<int_riscv_paaddu_b,    PAADDU_B>;
+def : PatGprGpr<int_riscv_paaddu_h,    PAADDU_H>;
 } // Predicates = [HasStdExtP]
 
 let Predicates = [HasStdExtP, IsRV32] in {
@@ -929,6 +939,9 @@ def : PatGprImm<int_riscv_usati, USATI_RV32, uimm5>;
 def : PatGprImm<int_riscv_srari, SRARI_RV32, uimm5>;
 def : PatGprImm<int_riscv_sati,  SATI_RV32,  uimm5>;
 def : PatGprGpr<int_riscv_sadd,  SADD>;
+def : PatGprGpr<int_riscv_aadd,  AADD>;
+def : PatGprGpr<int_riscv_saddu, SADDU>;
+def : PatGprGpr<int_riscv_aaddu, AADDU>;
 } // Predicates = [HasStdExtP, IsRV32]
 
 let Predicates = [HasStdExtP, IsRV64] in {
@@ -952,4 +965,9 @@ def : PatGprGpr<int_riscv_psrl_ws,     PSRL_WS>;
 def : PatGprGpr<int_riscv_predsum_ws,  PREDSUM_WS>;
 def : PatGprGpr<int_riscv_predsumu_ws, PREDSUMU_WS>;
 def : PatGprGpr<int_riscv_psra_ws,     PSRA_WS>;
+def : PatGprGpr<int_riscv_padd_w,      PADD_W>;
+def : PatGprGpr<int_riscv_psadd_w,     PSADD_W>;
+def : PatGprGpr<int_riscv_paadd_w,     PAADD_W>;
+def : PatGprGpr<int_riscv_psaddu_w,    PSADDU_W>;
+def : PatGprGpr<int_riscv_paaddu_w,    PAADDU_W>;
 } // Predicates = [HasStdExtP, IsRV64]
\ No newline at end of file
diff --git a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
index ec2ef5de7b19e..eedada0229afe 100644
--- a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
@@ -332,6 +332,28 @@ define i32 @psra_hs(i32 %a, i32 %b) {
     ret i32 %tmp
 }
 
+declare i32 @llvm.riscv.padd.b.i32(i32, i32)
+
+define i32 @padd_b(i32 %a, i32 %b) {
+; RV32P-LABEL: padd_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    padd.b a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.padd.b.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.padd.h.i32(i32, i32)
+
+define i32 @padd_h(i32 %a, i32 %b) {
+; RV32P-LABEL: padd_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    padd.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.padd.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
 declare i32 @llvm.riscv.sadd.i32(i32, i32)
 
 define i32 @sadd(i32 %a, i32 %b) {
@@ -339,6 +361,127 @@ define i32 @sadd(i32 %a, i32 %b) {
 ; RV32P:       # %bb.0:
 ; RV32P-NEXT:    sadd a0, a0, a1
 ; RV32P-NEXT:    ret
-  %tmp = call i32 @llvm.riscv.sadd.i32(i32 %a, i32 %b)
-  ret i32 %tmp
+    %tmp = call i32 @llvm.riscv.sadd.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.psadd.b.i32(i32, i32)
+
+define i32 @psadd_b(i32 %a, i32 %b) {
+; RV32P-LABEL: psadd_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psadd.b a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.psadd.b.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.psadd.h.i32(i32, i32)
+
+define i32 @psadd_h(i32 %a, i32 %b) {
+; RV32P-LABEL: psadd_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psadd.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.psadd.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.aadd.i32(i32, i32)
+
+define i32 @aadd(i32 %a, i32 %b) {
+; RV32P-LABEL: aadd:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    aadd a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.aadd.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.paadd.b.i32(i32, i32)
+
+define i32 @paadd_b(i32 %a, i32 %b) {
+; RV32P-LABEL: paadd_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    paadd.b a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.paadd.b.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.paadd.h.i32(i32, i32)
+
+define i32 @paadd_h(i32 %a, i32 %b) {
+; RV32P-LABEL: paadd_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    paadd.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.paadd.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.saddu.i32(i32, i32)
+
+define i32 @saddu(i32 %a, i32 %b) {
+; RV32P-LABEL: saddu:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    saddu a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.saddu.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.psaddu.b.i32(i32, i32)
+
+define i32 @psaddu_b(i32 %a, i32 %b) {
+; RV32P-LABEL: psaddu_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psaddu.b a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.psaddu.b.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.psaddu.h.i32(i32, i32)
+
+define i32 @psaddu_h(i32 %a, i32 %b) {
+; RV32P-LABEL: psaddu_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psaddu.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.psaddu.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.aaddu.i32(i32, i32)
+
+define i32 @aaddu(i32 %a, i32 %b) {
+; RV32P-LABEL: aaddu:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    aaddu a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.aaddu.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.paaddu.b.i32(i32, i32)
+
+define i32 @paaddu_b(i32 %a, i32 %b) {
+; RV32P-LABEL: paaddu_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    paaddu.b a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.paaddu.b.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.paaddu.h.i32(i32, i32)
+
+define i32 @paaddu_h(i32 %a, i32 %b) {
+; RV32P-LABEL: paaddu_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    paaddu.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.paaddu.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
 }
diff --git a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
index b568044ca31d2..5c5f030e8dbcd 100644
--- a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
@@ -485,3 +485,168 @@ define i64 @psra_ws(i64 %a, i64 %b) {
     %tmp = call i64 @llvm.riscv.psra.ws.i64(i64 %a, i64 %b)
     ret i64 %tmp
 }
+
+declare i64 @llvm.riscv.padd.b.i64(i64, i64)
+
+define i64 @padd_b(i64 %a, i64 %b) {
+; RV64P-LABEL: padd_b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    padd.b a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.padd.b.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.padd.h.i64(i64, i64)
+
+define i64 @padd_h(i64 %a, i64 %b) {
+; RV64P-LABEL: padd_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    padd.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.padd.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.padd.w.i64(i64, i64)
+
+define i64 @padd_w(i64 %a, i64 %b) {
+; RV64P-LABEL: padd_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    padd.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.padd.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psadd.b.i64(i64, i64)
+
+define i64 @psadd_b(i64 %a, i64 %b) {
+; RV64P-LABEL: psadd_b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psadd.b a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psadd.b.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psadd.h.i64(i64, i64)
+
+define i64 @psadd_h(i64 %a, i64 %b) {
+; RV64P-LABEL: psadd_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psadd.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psadd.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psadd.w.i64(i64, i64)
+
+define i64 @psadd_w(i64 %a, i64 %b) {
+; RV64P-LABEL: psadd_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psadd.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psadd.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.paadd.b.i64(i64, i64)
+
+define i64 @paadd_b(i64 %a, i64 %b) {
+; RV64P-LABEL: paadd_b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    paadd.b a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.paadd.b.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.paadd.h.i64(i64, i64)
+
+define i64 @paadd_h(i64 %a, i64 %b) {
+; RV64P-LABEL: paadd_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    paadd.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.paadd.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.paadd.w.i64(i64, i64)
+
+define i64 @paadd_w(i64 %a, i64 %b) {
+; RV64P-LABEL: paadd_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    paadd.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.paadd.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psaddu.b.i64(i64, i64)
+
+define i64 @psaddu_b(i64 %a, i64 %b) {
+; RV64P-LABEL: psaddu_b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psaddu.b a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psaddu.b.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psaddu.h.i64(i64, i64)
+
+define i64 @psaddu_h(i64 %a, i64 %b) {
+; RV64P-LABEL: psaddu_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psaddu.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psaddu.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psaddu.w.i64(i64, i64)
+
+define i64 @psaddu_w(i64 %a, i64 %b) {
+; RV64P-LABEL: psaddu_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psaddu.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psaddu.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.paaddu.b.i64(i64, i64)
+
+define i64 @paaddu_b(i64 %a, i64 %b) {
+; RV64P-LABEL: paaddu_b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    paaddu.b a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.paaddu.b.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.paaddu.h.i64(i64, i64)
+
+define i64 @paaddu_h(i64 %a, i64 %b) {
+; RV64P-LABEL: paaddu_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    paaddu.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.paaddu.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.paaddu.w.i64(i64, i64)
+
+define i64 @paaddu_w(i64 %a, i64 %b) {
+; RV64P-LABEL: paaddu_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    paaddu.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.paaddu.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}

>From 5ffc37f82be127fba28f8dc80d84a3129e3eb8ef Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Tue, 2 Sep 2025 15:40:07 +0800
Subject: [PATCH 15/40] [RISCV] Packed Subtraction and Saturating Subtraction
 LLVM IR intrinsics

---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       |  19 ++
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td      |  19 ++
 llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll | 154 ++++++++++++++++
 llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll | 165 ++++++++++++++++++
 4 files changed, 357 insertions(+)

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index eca7c7fc97d5a..1359b2e98a71b 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1955,6 +1955,25 @@ let TargetPrefix = "riscv" in {
   defm paaddu_b    : RVPBinaryIntrinsics;
   defm paaddu_h    : RVPBinaryIntrinsics;
   defm paaddu_w    : RVPBinaryIntrinsics;
+  defm psub_b      : RVPBinaryIntrinsics;
+  defm psub_h      : RVPBinaryIntrinsics;
+  defm psub_w      : RVPBinaryIntrinsics;
+  defm ssub        : RVPBinaryIntrinsics;
+  defm pssub_b     : RVPBinaryIntrinsics;
+  defm pssub_h     : RVPBinaryIntrinsics;
+  defm pssub_w     : RVPBinaryIntrinsics;
+  defm asub        : RVPBinaryIntrinsics;
+  defm pasub_b     : RVPBinaryIntrinsics;
+  defm pasub_h     : RVPBinaryIntrinsics;
+  defm pasub_w     : RVPBinaryIntrinsics;
+  defm ssubu       : RVPBinaryIntrinsics;
+  defm pssubu_b    : RVPBinaryIntrinsics;
+  defm pssubu_h    : RVPBinaryIntrinsics;
+  defm pssubu_w    : RVPBinaryIntrinsics;
+  defm asubu       : RVPBinaryIntrinsics;
+  defm pasubu_b    : RVPBinaryIntrinsics;
+  defm pasubu_h    : RVPBinaryIntrinsics;
+  defm pasubu_w    : RVPBinaryIntrinsics;
 
   class RVPBinaryAABIntrinsics
       : Intrinsic<[llvm_any_ty],
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 2954c89f1b223..61235a043b506 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -929,6 +929,16 @@ def : PatGprGpr<int_riscv_psaddu_b,    PSADDU_B>;
 def : PatGprGpr<int_riscv_psaddu_h,    PSADDU_H>;
 def : PatGprGpr<int_riscv_paaddu_b,    PAADDU_B>;
 def : PatGprGpr<int_riscv_paaddu_h,    PAADDU_H>;
+def : PatGprGpr<int_riscv_psub_b,      PSUB_B>;
+def : PatGprGpr<int_riscv_psub_h,      PSUB_H>;
+def : PatGprGpr<int_riscv_pssub_b,     PSSUB_B>;
+def : PatGprGpr<int_riscv_pssub_h,     PSSUB_H>;
+def : PatGprGpr<int_riscv_pasub_b,     PASUB_B>;
+def : PatGprGpr<int_riscv_pasub_h,     PASUB_H>;
+def : PatGprGpr<int_riscv_pssubu_b,    PSSUBU_B>;
+def : PatGprGpr<int_riscv_pssubu_h,    PSSUBU_H>;
+def : PatGprGpr<int_riscv_pasubu_b,    PASUBU_B>;
+def : PatGprGpr<int_riscv_pasubu_h,    PASUBU_H>;
 } // Predicates = [HasStdExtP]
 
 let Predicates = [HasStdExtP, IsRV32] in {
@@ -942,6 +952,10 @@ def : PatGprGpr<int_riscv_sadd,  SADD>;
 def : PatGprGpr<int_riscv_aadd,  AADD>;
 def : PatGprGpr<int_riscv_saddu, SADDU>;
 def : PatGprGpr<int_riscv_aaddu, AADDU>;
+def : PatGprGpr<int_riscv_ssub,  SSUB>;
+def : PatGprGpr<int_riscv_asub,  ASUB>;
+def : PatGprGpr<int_riscv_ssubu, SSUBU>;
+def : PatGprGpr<int_riscv_asubu, ASUBU>;
 } // Predicates = [HasStdExtP, IsRV32]
 
 let Predicates = [HasStdExtP, IsRV64] in {
@@ -970,4 +984,9 @@ def : PatGprGpr<int_riscv_psadd_w,     PSADD_W>;
 def : PatGprGpr<int_riscv_paadd_w,     PAADD_W>;
 def : PatGprGpr<int_riscv_psaddu_w,    PSADDU_W>;
 def : PatGprGpr<int_riscv_paaddu_w,    PAADDU_W>;
+def : PatGprGpr<int_riscv_psub_w,      PSUB_W>;
+def : PatGprGpr<int_riscv_pssub_w,     PSSUB_W>;
+def : PatGprGpr<int_riscv_pasub_w,     PASUB_W>;
+def : PatGprGpr<int_riscv_pssubu_w,    PSSUBU_W>;
+def : PatGprGpr<int_riscv_pasubu_w,    PASUBU_W>;
 } // Predicates = [HasStdExtP, IsRV64]
\ No newline at end of file
diff --git a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
index eedada0229afe..95d9955bebda3 100644
--- a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
@@ -485,3 +485,157 @@ define i32 @paaddu_h(i32 %a, i32 %b) {
     %tmp = call i32 @llvm.riscv.paaddu.h.i32(i32 %a, i32 %b)
     ret i32 %tmp
 }
+
+declare i32 @llvm.riscv.psub.b.i32(i32, i32)
+
+define i32 @psub_b(i32 %a, i32 %b) {
+; RV32P-LABEL: psub_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psub.b a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.psub.b.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.psub.h.i32(i32, i32)
+
+define i32 @psub_h(i32 %a, i32 %b) {
+; RV32P-LABEL: psub_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psub.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.psub.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.ssub.i32(i32, i32)
+
+define i32 @ssub(i32 %a, i32 %b) {
+; RV32P-LABEL: ssub:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    ssub a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.ssub.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pssub.b.i32(i32, i32)
+
+define i32 @pssub_b(i32 %a, i32 %b) {
+; RV32P-LABEL: pssub_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pssub.b a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pssub.b.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pssub.h.i32(i32, i32)
+
+define i32 @pssub_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pssub_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pssub.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pssub.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.asub.i32(i32, i32)
+
+define i32 @asub(i32 %a, i32 %b) {
+; RV32P-LABEL: asub:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    asub a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.asub.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pasub.b.i32(i32, i32)
+
+define i32 @pasub_b(i32 %a, i32 %b) {
+; RV32P-LABEL: pasub_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pasub.b a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pasub.b.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pasub.h.i32(i32, i32)
+
+define i32 @pasub_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pasub_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pasub.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pasub.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.ssubu.i32(i32, i32)
+
+define i32 @ssubu(i32 %a, i32 %b) {
+; RV32P-LABEL: ssubu:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    ssubu a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.ssubu.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pssubu.b.i32(i32, i32)
+
+define i32 @pssubu_b(i32 %a, i32 %b) {
+; RV32P-LABEL: pssubu_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pssubu.b a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pssubu.b.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pssubu.h.i32(i32, i32)
+
+define i32 @pssubu_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pssubu_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pssubu.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pssubu.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.asubu.i32(i32, i32)
+
+define i32 @asubu(i32 %a, i32 %b) {
+; RV32P-LABEL: asubu:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    asubu a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.asubu.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pasubu.b.i32(i32, i32)
+
+define i32 @pasubu_b(i32 %a, i32 %b) {
+; RV32P-LABEL: pasubu_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pasubu.b a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pasubu.b.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pasubu.h.i32(i32, i32)
+
+define i32 @pasubu_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pasubu_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pasubu.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pasubu.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
index 5c5f030e8dbcd..097fe7a9acdb6 100644
--- a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
@@ -650,3 +650,168 @@ define i64 @paaddu_w(i64 %a, i64 %b) {
     %tmp = call i64 @llvm.riscv.paaddu.w.i64(i64 %a, i64 %b)
     ret i64 %tmp
 }
+
+declare i64 @llvm.riscv.psub.b.i64(i64, i64)
+
+define i64 @psub_b(i64 %a, i64 %b) {
+; RV64P-LABEL: psub_b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psub.b a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psub.b.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psub.h.i64(i64, i64)
+
+define i64 @psub_h(i64 %a, i64 %b) {
+; RV64P-LABEL: psub_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psub.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psub.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psub.w.i64(i64, i64)
+
+define i64 @psub_w(i64 %a, i64 %b) {
+; RV64P-LABEL: psub_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psub.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psub.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pssub.b.i64(i64, i64)
+
+define i64 @pssub_b(i64 %a, i64 %b) {
+; RV64P-LABEL: pssub_b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pssub.b a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pssub.b.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pssub.h.i64(i64, i64)
+
+define i64 @pssub_h(i64 %a, i64 %b) {
+; RV64P-LABEL: pssub_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pssub.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pssub.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pssub.w.i64(i64, i64)
+
+define i64 @pssub_w(i64 %a, i64 %b) {
+; RV64P-LABEL: pssub_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pssub.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pssub.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pasub.b.i64(i64, i64)
+
+define i64 @pasub_b(i64 %a, i64 %b) {
+; RV64P-LABEL: pasub_b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pasub.b a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pasub.b.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pasub.h.i64(i64, i64)
+
+define i64 @pasub_h(i64 %a, i64 %b) {
+; RV64P-LABEL: pasub_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pasub.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pasub.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pasub.w.i64(i64, i64)
+
+define i64 @pasub_w(i64 %a, i64 %b) {
+; RV64P-LABEL: pasub_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pasub.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pasub.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pssubu.b.i64(i64, i64)
+
+define i64 @pssubu_b(i64 %a, i64 %b) {
+; RV64P-LABEL: pssubu_b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pssubu.b a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pssubu.b.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pssubu.h.i64(i64, i64)
+
+define i64 @pssubu_h(i64 %a, i64 %b) {
+; RV64P-LABEL: pssubu_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pssubu.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pssubu.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pssubu.w.i64(i64, i64)
+
+define i64 @pssubu_w(i64 %a, i64 %b) {
+; RV64P-LABEL: pssubu_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pssubu.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pssubu.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pasubu.b.i64(i64, i64)
+
+define i64 @pasubu_b(i64 %a, i64 %b) {
+; RV64P-LABEL: pasubu_b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pasubu.b a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pasubu.b.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pasubu.h.i64(i64, i64)
+
+define i64 @pasubu_h(i64 %a, i64 %b) {
+; RV64P-LABEL: pasubu_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pasubu.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pasubu.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pasubu.w.i64(i64, i64)
+
+define i64 @pasubu_w(i64 %a, i64 %b) {
+; RV64P-LABEL: pasubu_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pasubu.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pasubu.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}

>From aa91164f6ac6a11741b5631c6b401e6621bac887 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Tue, 2 Sep 2025 16:50:28 +0800
Subject: [PATCH 16/40] [RISCV] Add some P-ext LLVM IR intrinsics

Packed Difference intrinsics
Packed Shift Left and Shift Right intrinsics w/o srx
Packed Multiplication intrinsics
---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       |  25 +++-
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td      |  41 ++++--
 llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll | 100 +++++++++++++++
 llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll | 121 ++++++++++++++++++
 4 files changed, 272 insertions(+), 15 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 1359b2e98a71b..6a5063e7931e3 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1974,6 +1974,15 @@ let TargetPrefix = "riscv" in {
   defm pasubu_b    : RVPBinaryIntrinsics;
   defm pasubu_h    : RVPBinaryIntrinsics;
   defm pasubu_w    : RVPBinaryIntrinsics;
+  defm pdif_b      : RVPBinaryIntrinsics;
+  defm pdif_h      : RVPBinaryIntrinsics;
+  defm pdifu_b     : RVPBinaryIntrinsics;
+  defm pdifu_h     : RVPBinaryIntrinsics;
+  defm slx         : RVPBinaryIntrinsics;
+  defm mul_h01     : RVPBinaryIntrinsics;
+  defm mul_w01     : RVPBinaryIntrinsics;
+  defm mulu_h01    : RVPBinaryIntrinsics;
+  defm mulu_w01    : RVPBinaryIntrinsics;
 
   class RVPBinaryAABIntrinsics
       : Intrinsic<[llvm_any_ty],
@@ -1984,7 +1993,7 @@ let TargetPrefix = "riscv" in {
     def "int_riscv_" # NAME   : RVPBinaryAABIntrinsics;
   }
 
-      defm pslli_b  : RVPBinaryAABIntrinsics;
+  defm pslli_b  : RVPBinaryAABIntrinsics;
   defm pslli_h  : RVPBinaryAABIntrinsics;
   defm pslli_w  : RVPBinaryAABIntrinsics;
   defm psslai_h : RVPBinaryAABIntrinsics;
@@ -2005,6 +2014,20 @@ let TargetPrefix = "riscv" in {
   defm psati_h  : RVPBinaryAABIntrinsics;
   defm psati_w  : RVPBinaryAABIntrinsics;
   defm sati     : RVPBinaryAABIntrinsics;
+
+  class RVPBinaryABBIntrinsics
+      : Intrinsic<[llvm_any_ty],
+                  [LLVMMatchType<0>, llvm_any_ty],
+                  [IntrNoMem]>;
+
+  multiclass RVPBinaryABBIntrinsics {
+    def "int_riscv_" # NAME   : RVPBinaryABBIntrinsics;
+  }
+
+  defm pmul_h_b01  : RVPBinaryABBIntrinsics;
+  defm pmul_w_h01  : RVPBinaryABBIntrinsics;
+  defm pmulu_h_b01 : RVPBinaryABBIntrinsics;
+  defm pmulu_w_h01 : RVPBinaryABBIntrinsics;
 } // TargetPrefix = "riscv"
 
 // Vendor extensions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 61235a043b506..a987cd801c8a7 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -939,23 +939,32 @@ def : PatGprGpr<int_riscv_pssubu_b,    PSSUBU_B>;
 def : PatGprGpr<int_riscv_pssubu_h,    PSSUBU_H>;
 def : PatGprGpr<int_riscv_pasubu_b,    PASUBU_B>;
 def : PatGprGpr<int_riscv_pasubu_h,    PASUBU_H>;
+def : PatGprGpr<int_riscv_pdif_b,      PDIF_B>;
+def : PatGprGpr<int_riscv_pdif_h,      PDIF_H>;
+def : PatGprGpr<int_riscv_pdifu_b,     PDIFU_B>;
+def : PatGprGpr<int_riscv_pdifu_h,     PDIFU_H>;
+def : PatGprGpr<int_riscv_slx,         SLX>;
+def : PatGprGpr<int_riscv_pmul_h_b01,  PMUL_H_B01>;
+def : PatGprGpr<int_riscv_pmulu_h_b01, PMULU_H_B01>;
 } // Predicates = [HasStdExtP]
 
 let Predicates = [HasStdExtP, IsRV32] in {
-def : PatGprImm<int_riscv_sslai, SSLAI, uimm5>;
-def : PatGprGpr<int_riscv_ssha,  SSHA>;
-def : PatGprGpr<int_riscv_sshar, SSHAR>;
-def : PatGprImm<int_riscv_usati, USATI_RV32, uimm5>;
-def : PatGprImm<int_riscv_srari, SRARI_RV32, uimm5>;
-def : PatGprImm<int_riscv_sati,  SATI_RV32,  uimm5>;
-def : PatGprGpr<int_riscv_sadd,  SADD>;
-def : PatGprGpr<int_riscv_aadd,  AADD>;
-def : PatGprGpr<int_riscv_saddu, SADDU>;
-def : PatGprGpr<int_riscv_aaddu, AADDU>;
-def : PatGprGpr<int_riscv_ssub,  SSUB>;
-def : PatGprGpr<int_riscv_asub,  ASUB>;
-def : PatGprGpr<int_riscv_ssubu, SSUBU>;
-def : PatGprGpr<int_riscv_asubu, ASUBU>;
+def : PatGprImm<int_riscv_sslai,    SSLAI, uimm5>;
+def : PatGprGpr<int_riscv_ssha,     SSHA>;
+def : PatGprGpr<int_riscv_sshar,    SSHAR>;
+def : PatGprImm<int_riscv_usati,    USATI_RV32, uimm5>;
+def : PatGprImm<int_riscv_srari,    SRARI_RV32, uimm5>;
+def : PatGprImm<int_riscv_sati,     SATI_RV32,  uimm5>;
+def : PatGprGpr<int_riscv_sadd,     SADD>;
+def : PatGprGpr<int_riscv_aadd,     AADD>;
+def : PatGprGpr<int_riscv_saddu,    SADDU>;
+def : PatGprGpr<int_riscv_aaddu,    AADDU>;
+def : PatGprGpr<int_riscv_ssub,     SSUB>;
+def : PatGprGpr<int_riscv_asub,     ASUB>;
+def : PatGprGpr<int_riscv_ssubu,    SSUBU>;
+def : PatGprGpr<int_riscv_asubu,    ASUBU>;
+def : PatGprGpr<int_riscv_mul_h01,  MUL_H01>;
+def : PatGprGpr<int_riscv_mulu_h01, MULU_H01>;
 } // Predicates = [HasStdExtP, IsRV32]
 
 let Predicates = [HasStdExtP, IsRV64] in {
@@ -989,4 +998,8 @@ def : PatGprGpr<int_riscv_pssub_w,     PSSUB_W>;
 def : PatGprGpr<int_riscv_pasub_w,     PASUB_W>;
 def : PatGprGpr<int_riscv_pssubu_w,    PSSUBU_W>;
 def : PatGprGpr<int_riscv_pasubu_w,    PASUBU_W>;
+def : PatGprGpr<int_riscv_pmul_w_h01,  PMUL_W_H01>;
+def : PatGprGpr<int_riscv_pmulu_w_h01, PMULU_W_H01>;
+def : PatGprGpr<int_riscv_mul_w01,     MUL_W01>;
+def : PatGprGpr<int_riscv_mulu_w01,    MULU_W01>;
 } // Predicates = [HasStdExtP, IsRV64]
\ No newline at end of file
diff --git a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
index 95d9955bebda3..f53be830463f3 100644
--- a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
@@ -639,3 +639,103 @@ define i32 @pasubu_h(i32 %a, i32 %b) {
     %tmp = call i32 @llvm.riscv.pasubu.h.i32(i32 %a, i32 %b)
     ret i32 %tmp
 }
+
+declare i32 @llvm.riscv.pdif.b.i32(i32, i32)
+
+define i32 @pdif_b(i32 %a, i32 %b) {
+; RV32P-LABEL: pdif_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pdif.b a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pdif.b.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pdif.h.i32(i32, i32)
+
+define i32 @pdif_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pdif_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pdif.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pdif.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pdifu.b.i32(i32, i32)
+
+define i32 @pdifu_b(i32 %a, i32 %b) {
+; RV32P-LABEL: pdifu_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pdifu.b a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pdifu.b.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pdifu.h.i32(i32, i32)
+
+define i32 @pdifu_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pdifu_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pdifu.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pdifu.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.slx.i32(i32, i32)
+
+define i32 @slx(i32 %a, i32 %b) {
+; RV32P-LABEL: slx:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    slx a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.slx.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmul.h.b01.i32.i32(i32, i32)
+
+define i32 @pmul_h_b01(i32 %a, i32 %b) {
+; RV32P-LABEL: pmul_h_b01:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmul.h.b01 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmul.h.b01.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmulu.h.b01.i32.i32(i32, i32)
+
+define i32 @pmulu_h_b01(i32 %a, i32 %b) {
+; RV32P-LABEL: pmulu_h_b01:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmulu.h.b01 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmulu.h.b01.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.mul.h01.i32(i32, i32)
+
+define i32 @mul_h01(i32 %a, i32 %b) {
+; RV32P-LABEL: mul_h01:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    mul.h01 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.mul.h01.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.mulu.h01.i32(i32, i32)
+
+define i32 @mulu_h01(i32 %a, i32 %b) {
+; RV32P-LABEL: mulu_h01:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    mulu.h01 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.mulu.h01.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
diff --git a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
index 097fe7a9acdb6..f51984d3a1feb 100644
--- a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
@@ -815,3 +815,124 @@ define i64 @pasubu_w(i64 %a, i64 %b) {
     %tmp = call i64 @llvm.riscv.pasubu.w.i64(i64 %a, i64 %b)
     ret i64 %tmp
 }
+
+declare i64 @llvm.riscv.pdif.b.i64(i64, i64)
+
+define i64 @pdif_b(i64 %a, i64 %b) {
+; RV64P-LABEL: pdif_b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pdif.b a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pdif.b.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pdif.h.i64(i64, i64)
+
+define i64 @pdif_h(i64 %a, i64 %b) {
+; RV64P-LABEL: pdif_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pdif.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pdif.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pdifu.b.i64(i64, i64)
+
+define i64 @pdifu_b(i64 %a, i64 %b) {
+; RV64P-LABEL: pdifu_b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pdifu.b a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pdifu.b.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pdifu.h.i64(i64, i64)
+
+define i64 @pdifu_h(i64 %a, i64 %b) {
+; RV64P-LABEL: pdifu_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pdifu.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pdifu.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.slx.i64(i64, i64)
+
+define i64 @slx(i64 %a, i64 %b) {
+; RV64P-LABEL: slx:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    slx a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.slx.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmul.h.b01.i64.i64(i64, i64)
+
+define i64 @pmul_h_b01(i64 %a, i64 %b) {
+; RV64P-LABEL: pmul_h_b01:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmul.h.b01 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmul.h.b01.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmul.w.h01.i64.i64(i64, i64)
+
+define i64 @pmul_w_h01(i64 %a, i64 %b) {
+; RV64P-LABEL: pmul_w_h01:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmul.w.h01 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmul.w.h01.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulu.h.b01.i64.i64(i64, i64)
+
+define i64 @pmulu_h_b01(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulu_h_b01:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulu.h.b01 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulu.h.b01.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulu.w.h01.i64.i64(i64, i64)
+
+define i64 @pmulu_w_h01(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulu_w_h01:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulu.w.h01 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulu.w.h01.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.mul.w01.i64(i64, i64)
+
+define i64 @mul_w01(i64 %a, i64 %b) {
+; RV64P-LABEL: mul_w01:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    mul.w01 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.mul.w01.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.mulu.w01.i64(i64, i64)
+
+define i64 @mulu_w01(i64 %a, i64 %b) {
+; RV64P-LABEL: mulu_w01:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    mulu.w01 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.mulu.w01.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}

>From 8a8eac3f4a7fbaf9e9bfe386236ec567fa29dcc2 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Tue, 2 Sep 2025 17:06:51 +0800
Subject: [PATCH 17/40] [RISCV] Add some P-ext LLVM IR intrinsics

Packed Shift-and-Add (SH1ADD) Intrinsics
Packed Zip and Unzip Intrinsics
---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       |  13 ++
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td      |  13 ++
 llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll |  32 +++++
 llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll | 132 ++++++++++++++++++
 4 files changed, 190 insertions(+)

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 6a5063e7931e3..ff023ccddbb1e 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1983,6 +1983,19 @@ let TargetPrefix = "riscv" in {
   defm mul_w01     : RVPBinaryIntrinsics;
   defm mulu_h01    : RVPBinaryIntrinsics;
   defm mulu_w01    : RVPBinaryIntrinsics;
+  defm psh1add_h   : RVPBinaryIntrinsics;
+  defm psh1add_w   : RVPBinaryIntrinsics;
+  defm ssh1sadd    : RVPBinaryIntrinsics;
+  defm pssh1sadd_h : RVPBinaryIntrinsics;
+  defm pssh1sadd_w : RVPBinaryIntrinsics;
+  defm unzip8p     : RVPBinaryIntrinsics;
+  defm unzip16p    : RVPBinaryIntrinsics;
+  defm unzip8hp    : RVPBinaryIntrinsics;
+  defm unzip16hp   : RVPBinaryIntrinsics;
+  defm zip8p       : RVPBinaryIntrinsics;
+  defm zip16p      : RVPBinaryIntrinsics;
+  defm zip8hp      : RVPBinaryIntrinsics;
+  defm zip16hp     : RVPBinaryIntrinsics;
 
   class RVPBinaryAABIntrinsics
       : Intrinsic<[llvm_any_ty],
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index a987cd801c8a7..45e4f6dfd452c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -946,6 +946,8 @@ def : PatGprGpr<int_riscv_pdifu_h,     PDIFU_H>;
 def : PatGprGpr<int_riscv_slx,         SLX>;
 def : PatGprGpr<int_riscv_pmul_h_b01,  PMUL_H_B01>;
 def : PatGprGpr<int_riscv_pmulu_h_b01, PMULU_H_B01>;
+def : PatGprGpr<int_riscv_psh1add_h,   PSH1ADD_H>;
+def : PatGprGpr<int_riscv_pssh1sadd_h, PSSH1SADD_H>;
 } // Predicates = [HasStdExtP]
 
 let Predicates = [HasStdExtP, IsRV32] in {
@@ -965,6 +967,7 @@ def : PatGprGpr<int_riscv_ssubu,    SSUBU>;
 def : PatGprGpr<int_riscv_asubu,    ASUBU>;
 def : PatGprGpr<int_riscv_mul_h01,  MUL_H01>;
 def : PatGprGpr<int_riscv_mulu_h01, MULU_H01>;
+def : PatGprGpr<int_riscv_ssh1sadd, SSH1SADD>;
 } // Predicates = [HasStdExtP, IsRV32]
 
 let Predicates = [HasStdExtP, IsRV64] in {
@@ -1002,4 +1005,14 @@ def : PatGprGpr<int_riscv_pmul_w_h01,  PMUL_W_H01>;
 def : PatGprGpr<int_riscv_pmulu_w_h01, PMULU_W_H01>;
 def : PatGprGpr<int_riscv_mul_w01,     MUL_W01>;
 def : PatGprGpr<int_riscv_mulu_w01,    MULU_W01>;
+def : PatGprGpr<int_riscv_psh1add_w,   PSH1ADD_W>;
+def : PatGprGpr<int_riscv_pssh1sadd_w, PSSH1SADD_W>;
+def : PatGprGpr<int_riscv_unzip8p,     UNZIP8P>;
+def : PatGprGpr<int_riscv_unzip16p,    UNZIP16P>;
+def : PatGprGpr<int_riscv_unzip8hp,    UNZIP8HP>;
+def : PatGprGpr<int_riscv_unzip16hp,   UNZIP16HP>;
+def : PatGprGpr<int_riscv_zip8p,       ZIP8P>;
+def : PatGprGpr<int_riscv_zip16p,      ZIP16P>;
+def : PatGprGpr<int_riscv_zip8hp,      ZIP8HP>;
+def : PatGprGpr<int_riscv_zip16hp,     ZIP16HP>;
 } // Predicates = [HasStdExtP, IsRV64]
\ No newline at end of file
diff --git a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
index f53be830463f3..099b56776c95d 100644
--- a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
@@ -739,3 +739,35 @@ define i32 @mulu_h01(i32 %a, i32 %b) {
     ret i32 %tmp
 }
 
+declare i32 @llvm.riscv.psh1add.h.i32(i32, i32)
+
+define i32 @psh1add_h(i32 %a, i32 %b) {
+; RV32P-LABEL: psh1add_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psh1add.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.psh1add.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.ssh1sadd.i32(i32, i32)
+
+define i32 @ssh1sadd(i32 %a, i32 %b) {
+; RV32P-LABEL: ssh1sadd:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    ssh1sadd a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.ssh1sadd.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pssh1sadd.h.i32(i32, i32)
+
+define i32 @pssh1sadd_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pssh1sadd_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pssh1sadd.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pssh1sadd.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
index f51984d3a1feb..c00611d29aa0c 100644
--- a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
@@ -936,3 +936,135 @@ define i64 @mulu_w01(i64 %a, i64 %b) {
     %tmp = call i64 @llvm.riscv.mulu.w01.i64(i64 %a, i64 %b)
     ret i64 %tmp
 }
+
+declare i64 @llvm.riscv.psh1add.h.i64(i64, i64)
+
+define i64 @psh1add_h(i64 %a, i64 %b) {
+; RV64P-LABEL: psh1add_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psh1add.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psh1add.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psh1add.w.i64(i64, i64)
+
+define i64 @psh1add_w(i64 %a, i64 %b) {
+; RV64P-LABEL: psh1add_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psh1add.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psh1add.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pssh1sadd.h.i64(i64, i64)
+
+define i64 @pssh1sadd_h(i64 %a, i64 %b) {
+; RV64P-LABEL: pssh1sadd_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pssh1sadd.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pssh1sadd.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pssh1sadd.w.i64(i64, i64)
+
+define i64 @pssh1sadd_w(i64 %a, i64 %b) {
+; RV64P-LABEL: pssh1sadd_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pssh1sadd.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pssh1sadd.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.unzip8p.i64(i64, i64)
+
+define i64 @unzip8p(i64 %a, i64 %b) {
+; RV64P-LABEL: unzip8p:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    unzip8p a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.unzip8p.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.unzip16p.i64(i64, i64)
+
+define i64 @unzip16p(i64 %a, i64 %b) {
+; RV64P-LABEL: unzip16p:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    unzip16p a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.unzip16p.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.unzip8hp.i64(i64, i64)
+
+define i64 @unzip8hp(i64 %a, i64 %b) {
+; RV64P-LABEL: unzip8hp:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    unzip8hp a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.unzip8hp.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.unzip16hp.i64(i64, i64)
+
+define i64 @unzip16hp(i64 %a, i64 %b) {
+; RV64P-LABEL: unzip16hp:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    unzip16hp a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.unzip16hp.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.zip8p.i64(i64, i64)
+
+define i64 @zip8p(i64 %a, i64 %b) {
+; RV64P-LABEL: zip8p:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    zip8p a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.zip8p.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.zip16p.i64(i64, i64)
+
+define i64 @zip16p(i64 %a, i64 %b) {
+; RV64P-LABEL: zip16p:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    zip16p a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.zip16p.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.zip8hp.i64(i64, i64)
+
+define i64 @zip8hp(i64 %a, i64 %b) {
+; RV64P-LABEL: zip8hp:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    zip8hp a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.zip8hp.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.zip16hp.i64(i64, i64)
+
+define i64 @zip16hp(i64 %a, i64 %b) {
+; RV64P-LABEL: zip16hp:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    zip16hp a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.zip16hp.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}

>From 5c239ddc2ed93b5360f4832d8d1c8a8010fff4d1 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Tue, 2 Sep 2025 17:17:31 +0800
Subject: [PATCH 18/40] [RISCV] Packed Multiply LLVM IR Intrinsics (Lane
 Variants 00 and 11)

---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       |  34 +++
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td      | 252 ++++++++++--------
 llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll | 132 +++++++++
 llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll | 198 ++++++++++++++
 4 files changed, 502 insertions(+), 114 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index ff023ccddbb1e..793688782c271 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -2041,6 +2041,40 @@ let TargetPrefix = "riscv" in {
   defm pmul_w_h01  : RVPBinaryABBIntrinsics;
   defm pmulu_h_b01 : RVPBinaryABBIntrinsics;
   defm pmulu_w_h01 : RVPBinaryABBIntrinsics;
+  defm pmul_h_b00  : RVPBinaryABBIntrinsics;
+  defm pmul_w_h00  : RVPBinaryABBIntrinsics;
+  defm pmul_h_b11  : RVPBinaryABBIntrinsics;
+  defm pmul_w_h11  : RVPBinaryABBIntrinsics;
+  defm pmulu_h_b00 : RVPBinaryABBIntrinsics;
+  defm pmulu_w_h00 : RVPBinaryABBIntrinsics;
+  defm pmulu_h_b11 : RVPBinaryABBIntrinsics;
+  defm pmulu_w_h11 : RVPBinaryABBIntrinsics;
+  defm mul_h00     : RVPBinaryABBIntrinsics;
+  defm mul_w00     : RVPBinaryABBIntrinsics;
+  defm mul_h11     : RVPBinaryABBIntrinsics;
+  defm mul_w11     : RVPBinaryABBIntrinsics;
+  defm mulu_h00    : RVPBinaryABBIntrinsics;
+  defm mulu_w00    : RVPBinaryABBIntrinsics;
+  defm mulu_h11    : RVPBinaryABBIntrinsics;
+  defm mulu_w11    : RVPBinaryABBIntrinsics;
+
+  class RVPBinaryABCIntrinsics
+      : Intrinsic<[llvm_any_ty],
+                  [llvm_any_ty, llvm_any_ty],
+                  [IntrNoMem]>;
+
+  multiclass RVPBinaryABCIntrinsics {
+    def "int_riscv_" # NAME   : RVPBinaryABCIntrinsics;
+  }
+
+  defm pmulsu_h_b00      : RVPBinaryABCIntrinsics;
+  defm pmulsu_w_h00      : RVPBinaryABCIntrinsics;
+  defm pmulsu_h_b11      : RVPBinaryABCIntrinsics;
+  defm pmulsu_w_h11      : RVPBinaryABCIntrinsics;
+  defm mulsu_h00         : RVPBinaryABCIntrinsics;
+  defm mulsu_w00         : RVPBinaryABCIntrinsics;
+  defm mulsu_h11         : RVPBinaryABCIntrinsics;
+  defm mulsu_w11         : RVPBinaryABCIntrinsics;
 } // TargetPrefix = "riscv"
 
 // Vendor extensions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 45e4f6dfd452c..85720047bd1a7 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -895,124 +895,148 @@ let Predicates = [HasStdExtP, IsRV32] in {
 //===----------------------------------------------------------------------===//
 
 let Predicates = [HasStdExtP] in {
-def : PatGprImm<int_riscv_pslli_b,     PSLLI_B,  uimm3>;
-def : PatGprImm<int_riscv_pslli_h,     PSLLI_H,  uimm4>;
-def : PatGprImm<int_riscv_psslai_h,    PSSLAI_H, uimm4>;
-def : PatGprGpr<int_riscv_psll_bs,     PSLL_BS>;
-def : PatGprGpr<int_riscv_psll_hs,     PSLL_HS>;
-def : PatGprGpr<int_riscv_padd_bs,     PADD_BS>;
-def : PatGprGpr<int_riscv_padd_hs,     PADD_HS>;
-def : PatGprGpr<int_riscv_pssha_hs,    PSSHA_HS>;
-def : PatGprGpr<int_riscv_psshar_hs,   PSSHAR_HS>;
-def : PatGprImm<int_riscv_psrli_b,     PSRLI_B,  uimm3>;
-def : PatGprImm<int_riscv_psrli_h,     PSRLI_H,  uimm4>;
-def : PatGprImm<int_riscv_pusati_h,    PUSATI_H, uimm4>;
-def : PatGprImm<int_riscv_psrai_b,     PSRAI_B,  uimm3>;
-def : PatGprImm<int_riscv_psrai_h,     PSRAI_H,  uimm4>;
-def : PatGprImm<int_riscv_psrari_h,    PSRARI_H, uimm4>;
-def : PatGprImm<int_riscv_psati_h,     PSATI_H,  uimm4>;
-def : PatGprGpr<int_riscv_psrl_bs,     PSRL_BS>;
-def : PatGprGpr<int_riscv_psrl_hs,     PSRL_HS>;
-def : PatGprGpr<int_riscv_predsum_bs,  PREDSUM_BS>;
-def : PatGprGpr<int_riscv_predsum_hs,  PREDSUM_HS>;
-def : PatGprGpr<int_riscv_predsumu_bs, PREDSUMU_BS>;
-def : PatGprGpr<int_riscv_predsumu_hs, PREDSUMU_HS>;
-def : PatGprGpr<int_riscv_psra_bs,     PSRA_BS>;
-def : PatGprGpr<int_riscv_psra_hs,     PSRA_HS>;
-def : PatGprGpr<int_riscv_padd_b,      PADD_B>;
-def : PatGprGpr<int_riscv_padd_h,      PADD_H>;
-def : PatGprGpr<int_riscv_psadd_b,     PSADD_B>;
-def : PatGprGpr<int_riscv_psadd_h,     PSADD_H>;
-def : PatGprGpr<int_riscv_paadd_b,     PAADD_B>;
-def : PatGprGpr<int_riscv_paadd_h,     PAADD_H>;
-def : PatGprGpr<int_riscv_psaddu_b,    PSADDU_B>;
-def : PatGprGpr<int_riscv_psaddu_h,    PSADDU_H>;
-def : PatGprGpr<int_riscv_paaddu_b,    PAADDU_B>;
-def : PatGprGpr<int_riscv_paaddu_h,    PAADDU_H>;
-def : PatGprGpr<int_riscv_psub_b,      PSUB_B>;
-def : PatGprGpr<int_riscv_psub_h,      PSUB_H>;
-def : PatGprGpr<int_riscv_pssub_b,     PSSUB_B>;
-def : PatGprGpr<int_riscv_pssub_h,     PSSUB_H>;
-def : PatGprGpr<int_riscv_pasub_b,     PASUB_B>;
-def : PatGprGpr<int_riscv_pasub_h,     PASUB_H>;
-def : PatGprGpr<int_riscv_pssubu_b,    PSSUBU_B>;
-def : PatGprGpr<int_riscv_pssubu_h,    PSSUBU_H>;
-def : PatGprGpr<int_riscv_pasubu_b,    PASUBU_B>;
-def : PatGprGpr<int_riscv_pasubu_h,    PASUBU_H>;
-def : PatGprGpr<int_riscv_pdif_b,      PDIF_B>;
-def : PatGprGpr<int_riscv_pdif_h,      PDIF_H>;
-def : PatGprGpr<int_riscv_pdifu_b,     PDIFU_B>;
-def : PatGprGpr<int_riscv_pdifu_h,     PDIFU_H>;
-def : PatGprGpr<int_riscv_slx,         SLX>;
-def : PatGprGpr<int_riscv_pmul_h_b01,  PMUL_H_B01>;
-def : PatGprGpr<int_riscv_pmulu_h_b01, PMULU_H_B01>;
-def : PatGprGpr<int_riscv_psh1add_h,   PSH1ADD_H>;
-def : PatGprGpr<int_riscv_pssh1sadd_h, PSSH1SADD_H>;
+def : PatGprImm<int_riscv_pslli_b,      PSLLI_B,  uimm3>;
+def : PatGprImm<int_riscv_pslli_h,      PSLLI_H,  uimm4>;
+def : PatGprImm<int_riscv_psslai_h,     PSSLAI_H, uimm4>;
+def : PatGprGpr<int_riscv_psll_bs,      PSLL_BS>;
+def : PatGprGpr<int_riscv_psll_hs,      PSLL_HS>;
+def : PatGprGpr<int_riscv_padd_bs,      PADD_BS>;
+def : PatGprGpr<int_riscv_padd_hs,      PADD_HS>;
+def : PatGprGpr<int_riscv_pssha_hs,     PSSHA_HS>;
+def : PatGprGpr<int_riscv_psshar_hs,    PSSHAR_HS>;
+def : PatGprImm<int_riscv_psrli_b,      PSRLI_B,  uimm3>;
+def : PatGprImm<int_riscv_psrli_h,      PSRLI_H,  uimm4>;
+def : PatGprImm<int_riscv_pusati_h,     PUSATI_H, uimm4>;
+def : PatGprImm<int_riscv_psrai_b,      PSRAI_B,  uimm3>;
+def : PatGprImm<int_riscv_psrai_h,      PSRAI_H,  uimm4>;
+def : PatGprImm<int_riscv_psrari_h,     PSRARI_H, uimm4>;
+def : PatGprImm<int_riscv_psati_h,      PSATI_H,  uimm4>;
+def : PatGprGpr<int_riscv_psrl_bs,      PSRL_BS>;
+def : PatGprGpr<int_riscv_psrl_hs,      PSRL_HS>;
+def : PatGprGpr<int_riscv_predsum_bs,   PREDSUM_BS>;
+def : PatGprGpr<int_riscv_predsum_hs,   PREDSUM_HS>;
+def : PatGprGpr<int_riscv_predsumu_bs,  PREDSUMU_BS>;
+def : PatGprGpr<int_riscv_predsumu_hs,  PREDSUMU_HS>;
+def : PatGprGpr<int_riscv_psra_bs,      PSRA_BS>;
+def : PatGprGpr<int_riscv_psra_hs,      PSRA_HS>;
+def : PatGprGpr<int_riscv_padd_b,       PADD_B>;
+def : PatGprGpr<int_riscv_padd_h,       PADD_H>;
+def : PatGprGpr<int_riscv_psadd_b,      PSADD_B>;
+def : PatGprGpr<int_riscv_psadd_h,      PSADD_H>;
+def : PatGprGpr<int_riscv_paadd_b,      PAADD_B>;
+def : PatGprGpr<int_riscv_paadd_h,      PAADD_H>;
+def : PatGprGpr<int_riscv_psaddu_b,     PSADDU_B>;
+def : PatGprGpr<int_riscv_psaddu_h,     PSADDU_H>;
+def : PatGprGpr<int_riscv_paaddu_b,     PAADDU_B>;
+def : PatGprGpr<int_riscv_paaddu_h,     PAADDU_H>;
+def : PatGprGpr<int_riscv_psub_b,       PSUB_B>;
+def : PatGprGpr<int_riscv_psub_h,       PSUB_H>;
+def : PatGprGpr<int_riscv_pssub_b,      PSSUB_B>;
+def : PatGprGpr<int_riscv_pssub_h,      PSSUB_H>;
+def : PatGprGpr<int_riscv_pasub_b,      PASUB_B>;
+def : PatGprGpr<int_riscv_pasub_h,      PASUB_H>;
+def : PatGprGpr<int_riscv_pssubu_b,     PSSUBU_B>;
+def : PatGprGpr<int_riscv_pssubu_h,     PSSUBU_H>;
+def : PatGprGpr<int_riscv_pasubu_b,     PASUBU_B>;
+def : PatGprGpr<int_riscv_pasubu_h,     PASUBU_H>;
+def : PatGprGpr<int_riscv_pdif_b,       PDIF_B>;
+def : PatGprGpr<int_riscv_pdif_h,       PDIF_H>;
+def : PatGprGpr<int_riscv_pdifu_b,      PDIFU_B>;
+def : PatGprGpr<int_riscv_pdifu_h,      PDIFU_H>;
+def : PatGprGpr<int_riscv_slx,          SLX>;
+def : PatGprGpr<int_riscv_pmul_h_b01,   PMUL_H_B01>;
+def : PatGprGpr<int_riscv_pmulu_h_b01,  PMULU_H_B01>;
+def : PatGprGpr<int_riscv_psh1add_h,    PSH1ADD_H>;
+def : PatGprGpr<int_riscv_pssh1sadd_h,  PSSH1SADD_H>;
+def : PatGprGpr<int_riscv_pmul_h_b00,   PMUL_H_B00>;
+def : PatGprGpr<int_riscv_pmul_h_b11,   PMUL_H_B11>;
+def : PatGprGpr<int_riscv_pmulu_h_b00,  PMULU_H_B00>;
+def : PatGprGpr<int_riscv_pmulu_h_b11,  PMULU_H_B11>;
+def : PatGprGpr<int_riscv_pmulsu_h_b00, PMULSU_H_B00>;
+def : PatGprGpr<int_riscv_pmulsu_h_b11, PMULSU_H_B11>;
 } // Predicates = [HasStdExtP]
 
 let Predicates = [HasStdExtP, IsRV32] in {
-def : PatGprImm<int_riscv_sslai,    SSLAI, uimm5>;
-def : PatGprGpr<int_riscv_ssha,     SSHA>;
-def : PatGprGpr<int_riscv_sshar,    SSHAR>;
-def : PatGprImm<int_riscv_usati,    USATI_RV32, uimm5>;
-def : PatGprImm<int_riscv_srari,    SRARI_RV32, uimm5>;
-def : PatGprImm<int_riscv_sati,     SATI_RV32,  uimm5>;
-def : PatGprGpr<int_riscv_sadd,     SADD>;
-def : PatGprGpr<int_riscv_aadd,     AADD>;
-def : PatGprGpr<int_riscv_saddu,    SADDU>;
-def : PatGprGpr<int_riscv_aaddu,    AADDU>;
-def : PatGprGpr<int_riscv_ssub,     SSUB>;
-def : PatGprGpr<int_riscv_asub,     ASUB>;
-def : PatGprGpr<int_riscv_ssubu,    SSUBU>;
-def : PatGprGpr<int_riscv_asubu,    ASUBU>;
-def : PatGprGpr<int_riscv_mul_h01,  MUL_H01>;
-def : PatGprGpr<int_riscv_mulu_h01, MULU_H01>;
-def : PatGprGpr<int_riscv_ssh1sadd, SSH1SADD>;
+def : PatGprImm<int_riscv_sslai,     SSLAI, uimm5>;
+def : PatGprGpr<int_riscv_ssha,      SSHA>;
+def : PatGprGpr<int_riscv_sshar,     SSHAR>;
+def : PatGprImm<int_riscv_usati,     USATI_RV32, uimm5>;
+def : PatGprImm<int_riscv_srari,     SRARI_RV32, uimm5>;
+def : PatGprImm<int_riscv_sati,      SATI_RV32,  uimm5>;
+def : PatGprGpr<int_riscv_sadd,      SADD>;
+def : PatGprGpr<int_riscv_aadd,      AADD>;
+def : PatGprGpr<int_riscv_saddu,     SADDU>;
+def : PatGprGpr<int_riscv_aaddu,     AADDU>;
+def : PatGprGpr<int_riscv_ssub,      SSUB>;
+def : PatGprGpr<int_riscv_asub,      ASUB>;
+def : PatGprGpr<int_riscv_ssubu,     SSUBU>;
+def : PatGprGpr<int_riscv_asubu,     ASUBU>;
+def : PatGprGpr<int_riscv_mul_h01,   MUL_H01>;
+def : PatGprGpr<int_riscv_mulu_h01,  MULU_H01>;
+def : PatGprGpr<int_riscv_ssh1sadd,  SSH1SADD>;
+def : PatGprGpr<int_riscv_mul_h00,   MUL_H00>;
+def : PatGprGpr<int_riscv_mul_h11,   MUL_H11>;
+def : PatGprGpr<int_riscv_mulu_h00,  MULU_H00>;
+def : PatGprGpr<int_riscv_mulu_h11,  MULU_H11>;
+def : PatGprGpr<int_riscv_mulsu_h00, MULSU_H00>;
+def : PatGprGpr<int_riscv_mulsu_h11, MULSU_H11>;
 } // Predicates = [HasStdExtP, IsRV32]
 
 let Predicates = [HasStdExtP, IsRV64] in {
-def : PatGprImm<int_riscv_pslli_w,     PSLLI_W,  uimm5>;
-def : PatGprImm<int_riscv_psslai_w,    PSSLAI_W, uimm5>;
-def : PatGprGpr<int_riscv_psll_ws,     PSLL_WS>;
-def : PatGprGpr<int_riscv_padd_ws,     PADD_WS>;
-def : PatGprGpr<int_riscv_pssha_ws,    PSSHA_WS>;
-def : PatGprGpr<int_riscv_sha,         SHA>;
-def : PatGprGpr<int_riscv_psshar_ws,   PSSHAR_WS>;
-def : PatGprGpr<int_riscv_shar,        SHAR>;
-def : PatGprImm<int_riscv_psrli_w,     PSRLI_W,  uimm5>;
-def : PatGprImm<int_riscv_pusati_w,    PUSATI_W, uimm5>;
-def : PatGprImm<int_riscv_usati,       USATI_RV64, uimm6>;
-def : PatGprImm<int_riscv_psrai_w,     PSRAI_W,    uimm5>;
-def : PatGprImm<int_riscv_psrari_w,    PSRARI_W,   uimm5>;
-def : PatGprImm<int_riscv_srari,       SRARI_RV64, uimm6>;
-def : PatGprImm<int_riscv_psati_w,     PSATI_W,    uimm5>;
-def : PatGprImm<int_riscv_sati,        SATI_RV64,  uimm6>;
-def : PatGprGpr<int_riscv_psrl_ws,     PSRL_WS>;
-def : PatGprGpr<int_riscv_predsum_ws,  PREDSUM_WS>;
-def : PatGprGpr<int_riscv_predsumu_ws, PREDSUMU_WS>;
-def : PatGprGpr<int_riscv_psra_ws,     PSRA_WS>;
-def : PatGprGpr<int_riscv_padd_w,      PADD_W>;
-def : PatGprGpr<int_riscv_psadd_w,     PSADD_W>;
-def : PatGprGpr<int_riscv_paadd_w,     PAADD_W>;
-def : PatGprGpr<int_riscv_psaddu_w,    PSADDU_W>;
-def : PatGprGpr<int_riscv_paaddu_w,    PAADDU_W>;
-def : PatGprGpr<int_riscv_psub_w,      PSUB_W>;
-def : PatGprGpr<int_riscv_pssub_w,     PSSUB_W>;
-def : PatGprGpr<int_riscv_pasub_w,     PASUB_W>;
-def : PatGprGpr<int_riscv_pssubu_w,    PSSUBU_W>;
-def : PatGprGpr<int_riscv_pasubu_w,    PASUBU_W>;
-def : PatGprGpr<int_riscv_pmul_w_h01,  PMUL_W_H01>;
-def : PatGprGpr<int_riscv_pmulu_w_h01, PMULU_W_H01>;
-def : PatGprGpr<int_riscv_mul_w01,     MUL_W01>;
-def : PatGprGpr<int_riscv_mulu_w01,    MULU_W01>;
-def : PatGprGpr<int_riscv_psh1add_w,   PSH1ADD_W>;
-def : PatGprGpr<int_riscv_pssh1sadd_w, PSSH1SADD_W>;
-def : PatGprGpr<int_riscv_unzip8p,     UNZIP8P>;
-def : PatGprGpr<int_riscv_unzip16p,    UNZIP16P>;
-def : PatGprGpr<int_riscv_unzip8hp,    UNZIP8HP>;
-def : PatGprGpr<int_riscv_unzip16hp,   UNZIP16HP>;
-def : PatGprGpr<int_riscv_zip8p,       ZIP8P>;
-def : PatGprGpr<int_riscv_zip16p,      ZIP16P>;
-def : PatGprGpr<int_riscv_zip8hp,      ZIP8HP>;
-def : PatGprGpr<int_riscv_zip16hp,     ZIP16HP>;
+def : PatGprImm<int_riscv_pslli_w,      PSLLI_W,  uimm5>;
+def : PatGprImm<int_riscv_psslai_w,     PSSLAI_W, uimm5>;
+def : PatGprGpr<int_riscv_psll_ws,      PSLL_WS>;
+def : PatGprGpr<int_riscv_padd_ws,      PADD_WS>;
+def : PatGprGpr<int_riscv_pssha_ws,     PSSHA_WS>;
+def : PatGprGpr<int_riscv_sha,          SHA>;
+def : PatGprGpr<int_riscv_psshar_ws,    PSSHAR_WS>;
+def : PatGprGpr<int_riscv_shar,         SHAR>;
+def : PatGprImm<int_riscv_psrli_w,      PSRLI_W,  uimm5>;
+def : PatGprImm<int_riscv_pusati_w,     PUSATI_W, uimm5>;
+def : PatGprImm<int_riscv_usati,        USATI_RV64, uimm6>;
+def : PatGprImm<int_riscv_psrai_w,      PSRAI_W,    uimm5>;
+def : PatGprImm<int_riscv_psrari_w,     PSRARI_W,   uimm5>;
+def : PatGprImm<int_riscv_srari,        SRARI_RV64, uimm6>;
+def : PatGprImm<int_riscv_psati_w,      PSATI_W,    uimm5>;
+def : PatGprImm<int_riscv_sati,         SATI_RV64,  uimm6>;
+def : PatGprGpr<int_riscv_psrl_ws,      PSRL_WS>;
+def : PatGprGpr<int_riscv_predsum_ws,   PREDSUM_WS>;
+def : PatGprGpr<int_riscv_predsumu_ws,  PREDSUMU_WS>;
+def : PatGprGpr<int_riscv_psra_ws,      PSRA_WS>;
+def : PatGprGpr<int_riscv_padd_w,       PADD_W>;
+def : PatGprGpr<int_riscv_psadd_w,      PSADD_W>;
+def : PatGprGpr<int_riscv_paadd_w,      PAADD_W>;
+def : PatGprGpr<int_riscv_psaddu_w,     PSADDU_W>;
+def : PatGprGpr<int_riscv_paaddu_w,     PAADDU_W>;
+def : PatGprGpr<int_riscv_psub_w,       PSUB_W>;
+def : PatGprGpr<int_riscv_pssub_w,      PSSUB_W>;
+def : PatGprGpr<int_riscv_pasub_w,      PASUB_W>;
+def : PatGprGpr<int_riscv_pssubu_w,     PSSUBU_W>;
+def : PatGprGpr<int_riscv_pasubu_w,     PASUBU_W>;
+def : PatGprGpr<int_riscv_pmul_w_h01,   PMUL_W_H01>;
+def : PatGprGpr<int_riscv_pmulu_w_h01,  PMULU_W_H01>;
+def : PatGprGpr<int_riscv_mul_w01,      MUL_W01>;
+def : PatGprGpr<int_riscv_mulu_w01,     MULU_W01>;
+def : PatGprGpr<int_riscv_psh1add_w,    PSH1ADD_W>;
+def : PatGprGpr<int_riscv_pssh1sadd_w,  PSSH1SADD_W>;
+def : PatGprGpr<int_riscv_unzip8p,      UNZIP8P>;
+def : PatGprGpr<int_riscv_unzip16p,     UNZIP16P>;
+def : PatGprGpr<int_riscv_unzip8hp,     UNZIP8HP>;
+def : PatGprGpr<int_riscv_unzip16hp,    UNZIP16HP>;
+def : PatGprGpr<int_riscv_zip8p,        ZIP8P>;
+def : PatGprGpr<int_riscv_zip16p,       ZIP16P>;
+def : PatGprGpr<int_riscv_zip8hp,       ZIP8HP>;
+def : PatGprGpr<int_riscv_zip16hp,      ZIP16HP>;
+def : PatGprGpr<int_riscv_pmul_w_h00,   PMUL_W_H00>;
+def : PatGprGpr<int_riscv_pmul_w_h11,   PMUL_W_H11>;
+def : PatGprGpr<int_riscv_pmulu_w_h00,  PMULU_W_H00>;
+def : PatGprGpr<int_riscv_pmulu_w_h11,  PMULU_W_H11>;
+def : PatGprGpr<int_riscv_pmulsu_w_h00, PMULSU_W_H00>;
+def : PatGprGpr<int_riscv_pmulsu_w_h11, PMULSU_W_H11>;
+def : PatGprGpr<int_riscv_mul_w00,      MUL_W00>;
+def : PatGprGpr<int_riscv_mul_w11,      MUL_W11>;
+def : PatGprGpr<int_riscv_mulu_w00,     MULU_W00>;
+def : PatGprGpr<int_riscv_mulu_w11,     MULU_W11>;
+def : PatGprGpr<int_riscv_mulsu_w00,    MULSU_W00>;
+def : PatGprGpr<int_riscv_mulsu_w11,    MULSU_W11>;
 } // Predicates = [HasStdExtP, IsRV64]
\ No newline at end of file
diff --git a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
index 099b56776c95d..2b70414f8a353 100644
--- a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
@@ -771,3 +771,135 @@ define i32 @pssh1sadd_h(i32 %a, i32 %b) {
     %tmp = call i32 @llvm.riscv.pssh1sadd.h.i32(i32 %a, i32 %b)
     ret i32 %tmp
 }
+
+declare i32 @llvm.riscv.pmul.h.b00.i32.i32(i32, i32)
+
+define i32 @pmul_h_b00(i32 %a, i32 %b) {
+; RV32P-LABEL: pmul_h_b00:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmul.h.b00 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmul.h.b00.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmul.h.b11.i32.i32(i32, i32)
+
+define i32 @pmul_h_b11(i32 %a, i32 %b) {
+; RV32P-LABEL: pmul_h_b11:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmul.h.b11 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmul.h.b11.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmulu.h.b00.i32.i32(i32, i32)
+
+define i32 @pmulu_h_b00(i32 %a, i32 %b) {
+; RV32P-LABEL: pmulu_h_b00:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmulu.h.b00 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmulu.h.b00.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmulu.h.b11.i32.i32(i32, i32)
+
+define i32 @pmulu_h_b11(i32 %a, i32 %b) {
+; RV32P-LABEL: pmulu_h_b11:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmulu.h.b11 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmulu.h.b11.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmulsu.h.b00.i32.i32.i32(i32, i32)
+
+define i32 @pmulsu_h_b00(i32 %a, i32 %b) {
+; RV32P-LABEL: pmulsu_h_b00:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmulsu.h.b00 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmulsu.h.b00.i32.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmulsu.h.b11.i32.i32.i32(i32, i32)
+
+define i32 @pmulsu_h_b11(i32 %a, i32 %b) {
+; RV32P-LABEL: pmulsu_h_b11:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmulsu.h.b11 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmulsu.h.b11.i32.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.mul.h00.i32.i32(i32, i32)
+
+define i32 @mul_h00(i32 %a, i32 %b) {
+; RV32P-LABEL: mul_h00:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    mul.h00 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.mul.h00.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.mul.h11.i32.i32(i32, i32)
+
+define i32 @mul_h11(i32 %a, i32 %b) {
+; RV32P-LABEL: mul_h11:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    mul.h11 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.mul.h11.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.mulu.h00.i32.i32(i32, i32)
+
+define i32 @mulu_h00(i32 %a, i32 %b) {
+; RV32P-LABEL: mulu_h00:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    mulu.h00 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.mulu.h00.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.mulu.h11.i32.i32(i32, i32)
+
+define i32 @mulu_h11(i32 %a, i32 %b) {
+; RV32P-LABEL: mulu_h11:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    mulu.h11 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.mulu.h11.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.mulsu.h00.i32.i32.i32(i32, i32)
+
+define i32 @mulsu_h00(i32 %a, i32 %b) {
+; RV32P-LABEL: mulsu_h00:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    mulsu.h00 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.mulsu.h00.i32.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.mulsu.h11.i32.i32.i32(i32, i32)
+
+define i32 @mulsu_h11(i32 %a, i32 %b) {
+; RV32P-LABEL: mulsu_h11:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    mulsu.h11 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.mulsu.h11.i32.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
index c00611d29aa0c..01d57455df930 100644
--- a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
@@ -1068,3 +1068,201 @@ define i64 @zip16hp(i64 %a, i64 %b) {
     %tmp = call i64 @llvm.riscv.zip16hp.i64(i64 %a, i64 %b)
     ret i64 %tmp
 }
+
+declare i64 @llvm.riscv.pmul.h.b00.i64.i64(i64, i64)
+
+define i64 @pmul_h_b00(i64 %a, i64 %b) {
+; RV64P-LABEL: pmul_h_b00:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmul.h.b00 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmul.h.b00.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmul.w.h00.i64.i64(i64, i64)
+
+define i64 @pmul_w_h00(i64 %a, i64 %b) {
+; RV64P-LABEL: pmul_w_h00:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmul.w.h00 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmul.w.h00.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmul.h.b11.i64.i64(i64, i64)
+
+define i64 @pmul_h_b11(i64 %a, i64 %b) {
+; RV64P-LABEL: pmul_h_b11:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmul.h.b11 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmul.h.b11.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmul.w.h11.i64.i64(i64, i64)
+
+define i64 @pmul_w_h11(i64 %a, i64 %b) {
+; RV64P-LABEL: pmul_w_h11:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmul.w.h11 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmul.w.h11.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulu.h.b00.i64.i64(i64, i64)
+
+define i64 @pmulu_h_b00(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulu_h_b00:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulu.h.b00 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulu.h.b00.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulu.w.h00.i64.i64(i64, i64)
+
+define i64 @pmulu_w_h00(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulu_w_h00:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulu.w.h00 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulu.w.h00.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulu.h.b11.i64.i64(i64, i64)
+
+define i64 @pmulu_h_b11(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulu_h_b11:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulu.h.b11 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulu.h.b11.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulu.w.h11.i64.i64(i64, i64)
+
+define i64 @pmulu_w_h11(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulu_w_h11:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulu.w.h11 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulu.w.h11.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulsu.h.b00.i64.i64.i64(i64, i64)
+
+define i64 @pmulsu_h_b00(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulsu_h_b00:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulsu.h.b00 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulsu.h.b00.i64.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulsu.w.h00.i64.i64.i64(i64, i64)
+
+define i64 @pmulsu_w_h00(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulsu_w_h00:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulsu.w.h00 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulsu.w.h00.i64.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulsu.h.b11.i64.i64.i64(i64, i64)
+
+define i64 @pmulsu_h_b11(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulsu_h_b11:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulsu.h.b11 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulsu.h.b11.i64.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulsu.w.h11.i64.i64.i64(i64, i64)
+
+define i64 @pmulsu_w_h11(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulsu_w_h11:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulsu.w.h11 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulsu.w.h11.i64.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.mul.w00.i64.i64(i64, i64)
+
+define i64 @mul_w00(i64 %a, i64 %b) {
+; RV64P-LABEL: mul_w00:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    mul.w00 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.mul.w00.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.mul.w11.i64.i64(i64, i64)
+
+define i64 @mul_w11(i64 %a, i64 %b) {
+; RV64P-LABEL: mul_w11:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    mul.w11 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.mul.w11.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.mulu.w00.i64.i64(i64, i64)
+
+define i64 @mulu_w00(i64 %a, i64 %b) {
+; RV64P-LABEL: mulu_w00:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    mulu.w00 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.mulu.w00.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.mulu.w11.i64.i64(i64, i64)
+
+define i64 @mulu_w11(i64 %a, i64 %b) {
+; RV64P-LABEL: mulu_w11:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    mulu.w11 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.mulu.w11.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.mulsu.w00.i64.i64.i64(i64, i64)
+
+define i64 @mulsu_w00(i64 %a, i64 %b) {
+; RV64P-LABEL: mulsu_w00:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    mulsu.w00 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.mulsu.w00.i64.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.mulsu.w11.i64.i64.i64(i64, i64)
+
+define i64 @mulsu_w11(i64 %a, i64 %b) {
+; RV64P-LABEL: mulsu_w11:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    mulsu.w11 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.mulsu.w11.i64.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}

>From d9eff319bdec0062203f3bbd856469b35b8dd710 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Tue, 2 Sep 2025 17:25:05 +0800
Subject: [PATCH 19/40] [RISCV] Packed and Reordered Pack LLVM IR Intrinsics

---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       |  11 ++
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td      |  14 ++
 llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll |  77 +++++++++++
 llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll | 121 ++++++++++++++++++
 4 files changed, 223 insertions(+)

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 793688782c271..34c2fbd37f04f 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1996,6 +1996,17 @@ let TargetPrefix = "riscv" in {
   defm zip16p      : RVPBinaryIntrinsics;
   defm zip8hp      : RVPBinaryIntrinsics;
   defm zip16hp     : RVPBinaryIntrinsics;
+  defm ppack_h     : RVPBinaryIntrinsics;
+  defm ppack_w     : RVPBinaryIntrinsics;
+  defm ppackbt_h   : RVPBinaryIntrinsics;
+  defm ppackbt_w   : RVPBinaryIntrinsics;
+  defm packbt      : RVPBinaryIntrinsics;
+  defm ppacktb_h   : RVPBinaryIntrinsics;
+  defm ppacktb_w   : RVPBinaryIntrinsics;
+  defm packtb      : RVPBinaryIntrinsics;
+  defm ppackt_h    : RVPBinaryIntrinsics;
+  defm ppackt_w    : RVPBinaryIntrinsics;
+  defm packt       : RVPBinaryIntrinsics;
 
   class RVPBinaryAABIntrinsics
       : Intrinsic<[llvm_any_ty],
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 85720047bd1a7..81f8b5787d36f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -954,6 +954,10 @@ def : PatGprGpr<int_riscv_pmulu_h_b00,  PMULU_H_B00>;
 def : PatGprGpr<int_riscv_pmulu_h_b11,  PMULU_H_B11>;
 def : PatGprGpr<int_riscv_pmulsu_h_b00, PMULSU_H_B00>;
 def : PatGprGpr<int_riscv_pmulsu_h_b11, PMULSU_H_B11>;
+def : PatGprGpr<int_riscv_ppack_h,      PPACK_H>;
+def : PatGprGpr<int_riscv_ppackbt_h,    PPACKBT_H>;
+def : PatGprGpr<int_riscv_ppacktb_h,    PPACKTB_H>;
+def : PatGprGpr<int_riscv_ppackt_h,     PPACKT_H>;
 } // Predicates = [HasStdExtP]
 
 let Predicates = [HasStdExtP, IsRV32] in {
@@ -980,6 +984,9 @@ def : PatGprGpr<int_riscv_mulu_h00,  MULU_H00>;
 def : PatGprGpr<int_riscv_mulu_h11,  MULU_H11>;
 def : PatGprGpr<int_riscv_mulsu_h00, MULSU_H00>;
 def : PatGprGpr<int_riscv_mulsu_h11, MULSU_H11>;
+def : PatGprGpr<int_riscv_packbt,    PACKBT_RV32>;
+def : PatGprGpr<int_riscv_packtb,    PACKTB_RV32>;
+def : PatGprGpr<int_riscv_packt,     PACKT_RV32>;
 } // Predicates = [HasStdExtP, IsRV32]
 
 let Predicates = [HasStdExtP, IsRV64] in {
@@ -1039,4 +1046,11 @@ def : PatGprGpr<int_riscv_mulu_w00,     MULU_W00>;
 def : PatGprGpr<int_riscv_mulu_w11,     MULU_W11>;
 def : PatGprGpr<int_riscv_mulsu_w00,    MULSU_W00>;
 def : PatGprGpr<int_riscv_mulsu_w11,    MULSU_W11>;
+def : PatGprGpr<int_riscv_ppack_w,      PPACK_W>;
+def : PatGprGpr<int_riscv_ppackbt_w,    PPACKBT_W>;
+def : PatGprGpr<int_riscv_packbt,       PACKBT_RV64>;
+def : PatGprGpr<int_riscv_ppacktb_w,    PPACKTB_W>;
+def : PatGprGpr<int_riscv_packtb,       PACKTB_RV64>;
+def : PatGprGpr<int_riscv_ppackt_w,     PPACKT_W>;
+def : PatGprGpr<int_riscv_packt,        PACKT_RV64>;
 } // Predicates = [HasStdExtP, IsRV64]
\ No newline at end of file
diff --git a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
index 2b70414f8a353..31e339c74deea 100644
--- a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
@@ -903,3 +903,80 @@ define i32 @mulsu_h11(i32 %a, i32 %b) {
     %tmp = call i32 @llvm.riscv.mulsu.h11.i32.i32.i32(i32 %a, i32 %b)
     ret i32 %tmp
 }
+
+declare i32 @llvm.riscv.ppack.h.i32(i32, i32)
+
+define i32 @ppack_h(i32 %a, i32 %b) {
+; RV32P-LABEL: ppack_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    ppack.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.ppack.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.ppackbt.h.i32(i32, i32)
+
+define i32 @ppackbt_h(i32 %a, i32 %b) {
+; RV32P-LABEL: ppackbt_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    ppackbt.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.ppackbt.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.packbt.i32(i32, i32)
+
+define i32 @packbt(i32 %a, i32 %b) {
+; RV32P-LABEL: packbt:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    packbt a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.packbt.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.ppacktb.h.i32(i32, i32)
+
+define i32 @ppacktb_h(i32 %a, i32 %b) {
+; RV32P-LABEL: ppacktb_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    ppacktb.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.ppacktb.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.packtb.i32(i32, i32)
+
+define i32 @packtb(i32 %a, i32 %b) {
+; RV32P-LABEL: packtb:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    packtb a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.packtb.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.ppackt.h.i32(i32, i32)
+
+define i32 @ppackt_h(i32 %a, i32 %b) {
+; RV32P-LABEL: ppackt_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    ppackt.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.ppackt.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.packt.i32(i32, i32)
+
+define i32 @packt(i32 %a, i32 %b) {
+; RV32P-LABEL: packt:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    packt a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.packt.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
index 01d57455df930..dc7aecb4960a2 100644
--- a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
@@ -1266,3 +1266,124 @@ define i64 @mulsu_w11(i64 %a, i64 %b) {
     %tmp = call i64 @llvm.riscv.mulsu.w11.i64.i64.i64(i64 %a, i64 %b)
     ret i64 %tmp
 }
+
+declare i64 @llvm.riscv.ppack.h.i64(i64, i64)
+
+define i64 @ppack_h(i64 %a, i64 %b) {
+; RV64P-LABEL: ppack_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    ppack.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.ppack.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.ppack.w.i64(i64, i64)
+
+define i64 @ppack_w(i64 %a, i64 %b) {
+; RV64P-LABEL: ppack_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    ppack.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.ppack.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.ppackbt.h.i64(i64, i64)
+
+define i64 @ppackbt_h(i64 %a, i64 %b) {
+; RV64P-LABEL: ppackbt_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    ppackbt.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.ppackbt.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.ppackbt.w.i64(i64, i64)
+
+define i64 @ppackbt_w(i64 %a, i64 %b) {
+; RV64P-LABEL: ppackbt_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    ppackbt.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.ppackbt.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.packbt.i64(i64, i64)
+
+define i64 @packbt(i64 %a, i64 %b) {
+; RV64P-LABEL: packbt:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    packbt a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.packbt.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.ppacktb.h.i64(i64, i64)
+
+define i64 @ppacktb_h(i64 %a, i64 %b) {
+; RV64P-LABEL: ppacktb_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    ppacktb.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.ppacktb.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.ppacktb.w.i64(i64, i64)
+
+define i64 @ppacktb_w(i64 %a, i64 %b) {
+; RV64P-LABEL: ppacktb_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    ppacktb.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.ppacktb.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.packtb.i64(i64, i64)
+
+define i64 @packtb(i64 %a, i64 %b) {
+; RV64P-LABEL: packtb:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    packtb a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.packtb.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.ppackt.h.i64(i64, i64)
+
+define i64 @ppackt_h(i64 %a, i64 %b) {
+; RV64P-LABEL: ppackt_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    ppackt.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.ppackt.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.ppackt.w.i64(i64, i64)
+
+define i64 @ppackt_w(i64 %a, i64 %b) {
+; RV64P-LABEL: ppackt_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    ppackt.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.ppackt.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.packt.i64(i64, i64)
+
+define i64 @packt(i64 %a, i64 %b) {
+; RV64P-LABEL: packt:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    packt a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.packt.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}

>From b31098f3fd086dc5e39d8f3a0b310d4dc51ccd47 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Tue, 2 Sep 2025 17:53:01 +0800
Subject: [PATCH 20/40] [RISCV] Add some P-ext LLVM IR intrinsics

Cross-Lane Packed Add/Sub Intrinsics
Packed Comparison and Min/Max Intrinsics
---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       |  36 ++
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td      |  36 ++
 llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll | 253 ++++++++++++
 llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll | 363 ++++++++++++++++++
 4 files changed, 688 insertions(+)

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 34c2fbd37f04f..0eaf4af8e4c79 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -2007,6 +2007,42 @@ let TargetPrefix = "riscv" in {
   defm ppackt_h    : RVPBinaryIntrinsics;
   defm ppackt_w    : RVPBinaryIntrinsics;
   defm packt       : RVPBinaryIntrinsics;
+  defm pas_hx      : RVPBinaryIntrinsics;
+  defm pas_wx      : RVPBinaryIntrinsics;
+  defm psa_hx      : RVPBinaryIntrinsics;
+  defm psa_wx      : RVPBinaryIntrinsics;
+  defm psas_hx     : RVPBinaryIntrinsics;
+  defm psas_wx     : RVPBinaryIntrinsics;
+  defm pssa_hx     : RVPBinaryIntrinsics;
+  defm pssa_wx     : RVPBinaryIntrinsics;
+  defm paas_hx     : RVPBinaryIntrinsics;
+  defm paas_wx     : RVPBinaryIntrinsics;
+  defm pasa_hx     : RVPBinaryIntrinsics;
+  defm pasa_wx     : RVPBinaryIntrinsics;
+  defm mseq        : RVPBinaryIntrinsics;
+  defm pmseq_b     : RVPBinaryIntrinsics;
+  defm pmseq_h     : RVPBinaryIntrinsics;
+  defm pmseq_w     : RVPBinaryIntrinsics;
+  defm mslt        : RVPBinaryIntrinsics;
+  defm pmslt_b     : RVPBinaryIntrinsics;
+  defm pmslt_h     : RVPBinaryIntrinsics;
+  defm pmslt_w     : RVPBinaryIntrinsics;
+  defm msltu       : RVPBinaryIntrinsics;
+  defm pmsltu_b    : RVPBinaryIntrinsics;
+  defm pmsltu_h    : RVPBinaryIntrinsics;
+  defm pmsltu_w    : RVPBinaryIntrinsics;
+  defm pmin_b      : RVPBinaryIntrinsics;
+  defm pmin_h      : RVPBinaryIntrinsics;
+  defm pmin_w      : RVPBinaryIntrinsics;
+  defm pminu_b     : RVPBinaryIntrinsics;
+  defm pminu_h     : RVPBinaryIntrinsics;
+  defm pminu_w     : RVPBinaryIntrinsics;
+  defm pmax_b      : RVPBinaryIntrinsics;
+  defm pmax_h      : RVPBinaryIntrinsics;
+  defm pmax_w      : RVPBinaryIntrinsics;
+  defm pmaxu_b     : RVPBinaryIntrinsics;
+  defm pmaxu_h     : RVPBinaryIntrinsics;
+  defm pmaxu_w     : RVPBinaryIntrinsics;
 
   class RVPBinaryAABIntrinsics
       : Intrinsic<[llvm_any_ty],
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 81f8b5787d36f..7b249581b0b93 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -958,6 +958,26 @@ def : PatGprGpr<int_riscv_ppack_h,      PPACK_H>;
 def : PatGprGpr<int_riscv_ppackbt_h,    PPACKBT_H>;
 def : PatGprGpr<int_riscv_ppacktb_h,    PPACKTB_H>;
 def : PatGprGpr<int_riscv_ppackt_h,     PPACKT_H>;
+def : PatGprGpr<int_riscv_pas_hx,       PAS_HX>;
+def : PatGprGpr<int_riscv_psa_hx,       PSA_HX>;
+def : PatGprGpr<int_riscv_psas_hx,      PSAS_HX>;
+def : PatGprGpr<int_riscv_pssa_hx,      PSSA_HX>;
+def : PatGprGpr<int_riscv_paas_hx,      PAAS_HX>;
+def : PatGprGpr<int_riscv_pasa_hx,      PASA_HX>;
+def : PatGprGpr<int_riscv_pmseq_b,      PMSEQ_B>;
+def : PatGprGpr<int_riscv_pmseq_h,      PMSEQ_H>;
+def : PatGprGpr<int_riscv_pmslt_b,      PMSLT_B>;
+def : PatGprGpr<int_riscv_pmslt_h,      PMSLT_H>;
+def : PatGprGpr<int_riscv_pmsltu_b,     PMSLTU_B>;
+def : PatGprGpr<int_riscv_pmsltu_h,     PMSLTU_H>;
+def : PatGprGpr<int_riscv_pmin_b,       PMIN_B>;
+def : PatGprGpr<int_riscv_pmin_h,       PMIN_H>;
+def : PatGprGpr<int_riscv_pminu_b,      PMINU_B>;
+def : PatGprGpr<int_riscv_pminu_h,      PMINU_H>;
+def : PatGprGpr<int_riscv_pmax_b,       PMAX_B>;
+def : PatGprGpr<int_riscv_pmax_h,       PMAX_H>;
+def : PatGprGpr<int_riscv_pmaxu_b,      PMAXU_B>;
+def : PatGprGpr<int_riscv_pmaxu_h,      PMAXU_H>;
 } // Predicates = [HasStdExtP]
 
 let Predicates = [HasStdExtP, IsRV32] in {
@@ -987,6 +1007,9 @@ def : PatGprGpr<int_riscv_mulsu_h11, MULSU_H11>;
 def : PatGprGpr<int_riscv_packbt,    PACKBT_RV32>;
 def : PatGprGpr<int_riscv_packtb,    PACKTB_RV32>;
 def : PatGprGpr<int_riscv_packt,     PACKT_RV32>;
+def : PatGprGpr<int_riscv_mseq,      MSEQ>;
+def : PatGprGpr<int_riscv_mslt,      MSLT>;
+def : PatGprGpr<int_riscv_msltu,     MSLTU>;
 } // Predicates = [HasStdExtP, IsRV32]
 
 let Predicates = [HasStdExtP, IsRV64] in {
@@ -1053,4 +1076,17 @@ def : PatGprGpr<int_riscv_ppacktb_w,    PPACKTB_W>;
 def : PatGprGpr<int_riscv_packtb,       PACKTB_RV64>;
 def : PatGprGpr<int_riscv_ppackt_w,     PPACKT_W>;
 def : PatGprGpr<int_riscv_packt,        PACKT_RV64>;
+def : PatGprGpr<int_riscv_pas_wx,       PAS_WX>;
+def : PatGprGpr<int_riscv_psa_wx,       PSA_WX>;
+def : PatGprGpr<int_riscv_psas_wx,      PSAS_WX>;
+def : PatGprGpr<int_riscv_pssa_wx,      PSSA_WX>;
+def : PatGprGpr<int_riscv_paas_wx,      PAAS_WX>;
+def : PatGprGpr<int_riscv_pasa_wx,      PASA_WX>;
+def : PatGprGpr<int_riscv_pmseq_w,      PMSEQ_W>;
+def : PatGprGpr<int_riscv_pmslt_w,      PMSLT_W>;
+def : PatGprGpr<int_riscv_pmsltu_w,     PMSLTU_W>;
+def : PatGprGpr<int_riscv_pmin_w,       PMIN_W>;
+def : PatGprGpr<int_riscv_pminu_w,      PMINU_W>;
+def : PatGprGpr<int_riscv_pmax_w,       PMAX_W>;
+def : PatGprGpr<int_riscv_pmaxu_w,      PMAXU_W>;
 } // Predicates = [HasStdExtP, IsRV64]
\ No newline at end of file
diff --git a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
index 31e339c74deea..49073b5821dcb 100644
--- a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
@@ -980,3 +980,256 @@ define i32 @packt(i32 %a, i32 %b) {
     %tmp = call i32 @llvm.riscv.packt.i32(i32 %a, i32 %b)
     ret i32 %tmp
 }
+
+declare i32 @llvm.riscv.pas.hx.i32(i32, i32)
+
+define i32 @pas_hx(i32 %a, i32 %b) {
+; RV32P-LABEL: pas_hx:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pas.hx a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pas.hx.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.psa.hx.i32(i32, i32)
+
+define i32 @psa_hx(i32 %a, i32 %b) {
+; RV32P-LABEL: psa_hx:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psa.hx a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.psa.hx.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.psas.hx.i32(i32, i32)
+
+define i32 @psas_hx(i32 %a, i32 %b) {
+; RV32P-LABEL: psas_hx:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    psas.hx a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.psas.hx.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pssa.hx.i32(i32, i32)
+
+define i32 @pssa_hx(i32 %a, i32 %b) {
+; RV32P-LABEL: pssa_hx:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pssa.hx a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pssa.hx.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.paas.hx.i32(i32, i32)
+
+define i32 @paas_hx(i32 %a, i32 %b) {
+; RV32P-LABEL: paas_hx:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    paas.hx a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.paas.hx.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pasa.hx.i32(i32, i32)
+
+define i32 @pasa_hx(i32 %a, i32 %b) {
+; RV32P-LABEL: pasa_hx:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pasa.hx a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pasa.hx.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.mseq.i32(i32, i32)
+
+define i32 @mseq(i32 %a, i32 %b) {
+; RV32P-LABEL: mseq:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    mseq a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.mseq.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmseq.b.i32(i32, i32)
+
+define i32 @pmseq_b(i32 %a, i32 %b) {
+; RV32P-LABEL: pmseq_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmseq.b a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmseq.b.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmseq.h.i32(i32, i32)
+
+define i32 @pmseq_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pmseq_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmseq.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmseq.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.mslt.i32(i32, i32)
+
+define i32 @mslt(i32 %a, i32 %b) {
+; RV32P-LABEL: mslt:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    mslt a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.mslt.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmslt.b.i32(i32, i32)
+
+define i32 @pmslt_b(i32 %a, i32 %b) {
+; RV32P-LABEL: pmslt_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmslt.b a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmslt.b.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmslt.h.i32(i32, i32)
+
+define i32 @pmslt_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pmslt_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmslt.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmslt.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.msltu.i32(i32, i32)
+
+define i32 @msltu(i32 %a, i32 %b) {
+; RV32P-LABEL: msltu:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    msltu a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.msltu.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmsltu.b.i32(i32, i32)
+
+define i32 @pmsltu_b(i32 %a, i32 %b) {
+; RV32P-LABEL: pmsltu_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmsltu.b a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmsltu.b.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmsltu.h.i32(i32, i32)
+
+define i32 @pmsltu_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pmsltu_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmsltu.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmsltu.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmin.b.i32(i32, i32)
+
+define i32 @pmin_b(i32 %a, i32 %b) {
+; RV32P-LABEL: pmin_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmin.b a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmin.b.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmin.h.i32(i32, i32)
+
+define i32 @pmin_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pmin_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmin.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmin.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pminu.b.i32(i32, i32)
+
+define i32 @pminu_b(i32 %a, i32 %b) {
+; RV32P-LABEL: pminu_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pminu.b a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pminu.b.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pminu.h.i32(i32, i32)
+
+define i32 @pminu_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pminu_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pminu.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pminu.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmax.b.i32(i32, i32)
+
+define i32 @pmax_b(i32 %a, i32 %b) {
+; RV32P-LABEL: pmax_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmax.b a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmax.b.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmax.h.i32(i32, i32)
+
+define i32 @pmax_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pmax_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmax.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmax.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmaxu.b.i32(i32, i32)
+
+define i32 @pmaxu_b(i32 %a, i32 %b) {
+; RV32P-LABEL: pmaxu_b:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmaxu.b a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmaxu.b.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmaxu.h.i32(i32, i32)
+
+define i32 @pmaxu_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pmaxu_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmaxu.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmaxu.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
index dc7aecb4960a2..5e6d242e9c860 100644
--- a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
@@ -1387,3 +1387,366 @@ define i64 @packt(i64 %a, i64 %b) {
     %tmp = call i64 @llvm.riscv.packt.i64(i64 %a, i64 %b)
     ret i64 %tmp
 }
+
+declare i64 @llvm.riscv.pas.hx.i64(i64, i64)
+
+define i64 @pas_hx(i64 %a, i64 %b) {
+; RV64P-LABEL: pas_hx:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pas.hx a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pas.hx.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pas.wx.i64(i64, i64)
+
+define i64 @pas_wx(i64 %a, i64 %b) {
+; RV64P-LABEL: pas_wx:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pas.wx a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pas.wx.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psa.hx.i64(i64, i64)
+
+define i64 @psa_hx(i64 %a, i64 %b) {
+; RV64P-LABEL: psa_hx:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psa.hx a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psa.hx.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psa.wx.i64(i64, i64)
+
+define i64 @psa_wx(i64 %a, i64 %b) {
+; RV64P-LABEL: psa_wx:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psa.wx a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psa.wx.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psas.hx.i64(i64, i64)
+
+define i64 @psas_hx(i64 %a, i64 %b) {
+; RV64P-LABEL: psas_hx:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psas.hx a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psas.hx.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.psas.wx.i64(i64, i64)
+
+define i64 @psas_wx(i64 %a, i64 %b) {
+; RV64P-LABEL: psas_wx:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    psas.wx a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.psas.wx.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pssa.hx.i64(i64, i64)
+
+define i64 @pssa_hx(i64 %a, i64 %b) {
+; RV64P-LABEL: pssa_hx:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pssa.hx a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pssa.hx.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pssa.wx.i64(i64, i64)
+
+define i64 @pssa_wx(i64 %a, i64 %b) {
+; RV64P-LABEL: pssa_wx:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pssa.wx a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pssa.wx.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.paas.hx.i64(i64, i64)
+
+define i64 @paas_hx(i64 %a, i64 %b) {
+; RV64P-LABEL: paas_hx:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    paas.hx a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.paas.hx.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.paas.wx.i64(i64, i64)
+
+define i64 @paas_wx(i64 %a, i64 %b) {
+; RV64P-LABEL: paas_wx:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    paas.wx a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.paas.wx.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pasa.hx.i64(i64, i64)
+
+define i64 @pasa_hx(i64 %a, i64 %b) {
+; RV64P-LABEL: pasa_hx:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pasa.hx a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pasa.hx.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pasa.wx.i64(i64, i64)
+
+define i64 @pasa_wx(i64 %a, i64 %b) {
+; RV64P-LABEL: pasa_wx:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pasa.wx a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pasa.wx.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmseq.b.i64(i64, i64)
+
+define i64 @pmseq_b(i64 %a, i64 %b) {
+; RV64P-LABEL: pmseq_b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmseq.b a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmseq.b.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmseq.h.i64(i64, i64)
+
+define i64 @pmseq_h(i64 %a, i64 %b) {
+; RV64P-LABEL: pmseq_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmseq.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmseq.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmseq.w.i64(i64, i64)
+
+define i64 @pmseq_w(i64 %a, i64 %b) {
+; RV64P-LABEL: pmseq_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmseq.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmseq.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmslt.b.i64(i64, i64)
+
+define i64 @pmslt_b(i64 %a, i64 %b) {
+; RV64P-LABEL: pmslt_b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmslt.b a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmslt.b.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmslt.h.i64(i64, i64)
+
+define i64 @pmslt_h(i64 %a, i64 %b) {
+; RV64P-LABEL: pmslt_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmslt.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmslt.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmslt.w.i64(i64, i64)
+
+define i64 @pmslt_w(i64 %a, i64 %b) {
+; RV64P-LABEL: pmslt_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmslt.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmslt.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmsltu.b.i64(i64, i64)
+
+define i64 @pmsltu_b(i64 %a, i64 %b) {
+; RV64P-LABEL: pmsltu_b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmsltu.b a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmsltu.b.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmsltu.h.i64(i64, i64)
+
+define i64 @pmsltu_h(i64 %a, i64 %b) {
+; RV64P-LABEL: pmsltu_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmsltu.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmsltu.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmsltu.w.i64(i64, i64)
+
+define i64 @pmsltu_w(i64 %a, i64 %b) {
+; RV64P-LABEL: pmsltu_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmsltu.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmsltu.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmin.b.i64(i64, i64)
+
+define i64 @pmin_b(i64 %a, i64 %b) {
+; RV64P-LABEL: pmin_b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmin.b a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmin.b.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmin.h.i64(i64, i64)
+
+define i64 @pmin_h(i64 %a, i64 %b) {
+; RV64P-LABEL: pmin_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmin.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmin.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmin.w.i64(i64, i64)
+
+define i64 @pmin_w(i64 %a, i64 %b) {
+; RV64P-LABEL: pmin_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmin.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmin.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pminu.b.i64(i64, i64)
+
+define i64 @pminu_b(i64 %a, i64 %b) {
+; RV64P-LABEL: pminu_b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pminu.b a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pminu.b.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pminu.h.i64(i64, i64)
+
+define i64 @pminu_h(i64 %a, i64 %b) {
+; RV64P-LABEL: pminu_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pminu.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pminu.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pminu.w.i64(i64, i64)
+
+define i64 @pminu_w(i64 %a, i64 %b) {
+; RV64P-LABEL: pminu_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pminu.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pminu.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmax.b.i64(i64, i64)
+
+define i64 @pmax_b(i64 %a, i64 %b) {
+; RV64P-LABEL: pmax_b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmax.b a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmax.b.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmax.h.i64(i64, i64)
+
+define i64 @pmax_h(i64 %a, i64 %b) {
+; RV64P-LABEL: pmax_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmax.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmax.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmax.w.i64(i64, i64)
+
+define i64 @pmax_w(i64 %a, i64 %b) {
+; RV64P-LABEL: pmax_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmax.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmax.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmaxu.b.i64(i64, i64)
+
+define i64 @pmaxu_b(i64 %a, i64 %b) {
+; RV64P-LABEL: pmaxu_b:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmaxu.b a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmaxu.b.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmaxu.h.i64(i64, i64)
+
+define i64 @pmaxu_h(i64 %a, i64 %b) {
+; RV64P-LABEL: pmaxu_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmaxu.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmaxu.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmaxu.w.i64(i64, i64)
+
+define i64 @pmaxu_w(i64 %a, i64 %b) {
+; RV64P-LABEL: pmaxu_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmaxu.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmaxu.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}

>From 4dfba527863c574955ab574db223d00b56ffcc23 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Tue, 2 Sep 2025 18:00:01 +0800
Subject: [PATCH 21/40] [RISCV] Packed High-half Multiply and Accumulate LLVM
 IR Intrinsics

---
 llvm/include/llvm/IR/IntrinsicsRISCV.td       |  69 ++++--
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td      |  27 +++
 llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll | 187 +++++++++++++++
 llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll | 220 ++++++++++++++++++
 4 files changed, 482 insertions(+), 21 deletions(-)

diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 0eaf4af8e4c79..99a53f33dc872 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -2043,6 +2043,18 @@ let TargetPrefix = "riscv" in {
   defm pmaxu_b     : RVPBinaryIntrinsics;
   defm pmaxu_h     : RVPBinaryIntrinsics;
   defm pmaxu_w     : RVPBinaryIntrinsics;
+  defm pmulh_h     : RVPBinaryIntrinsics;
+  defm pmulh_w     : RVPBinaryIntrinsics;
+  defm pmulhu_h    : RVPBinaryIntrinsics;
+  defm pmulhu_w    : RVPBinaryIntrinsics;
+  defm pmulhr_h    : RVPBinaryIntrinsics;
+  defm pmulhr_w    : RVPBinaryIntrinsics;
+  defm pmulhru_h   : RVPBinaryIntrinsics;
+  defm pmulhru_w   : RVPBinaryIntrinsics;
+  defm mulh_h1     : RVPBinaryIntrinsics;
+  defm mulhr       : RVPBinaryIntrinsics;
+  defm mulhru      : RVPBinaryIntrinsics;
+  defm mulh_h0     : RVPBinaryIntrinsics;
 
   class RVPBinaryAABIntrinsics
       : Intrinsic<[llvm_any_ty],
@@ -2053,27 +2065,32 @@ let TargetPrefix = "riscv" in {
     def "int_riscv_" # NAME   : RVPBinaryAABIntrinsics;
   }
 
-  defm pslli_b  : RVPBinaryAABIntrinsics;
-  defm pslli_h  : RVPBinaryAABIntrinsics;
-  defm pslli_w  : RVPBinaryAABIntrinsics;
-  defm psslai_h : RVPBinaryAABIntrinsics;
-  defm psslai_w : RVPBinaryAABIntrinsics;
-  defm sslai    : RVPBinaryAABIntrinsics;
-  defm psrli_b  : RVPBinaryAABIntrinsics;
-  defm psrli_h  : RVPBinaryAABIntrinsics;
-  defm psrli_w  : RVPBinaryAABIntrinsics;
-  defm pusati_h : RVPBinaryAABIntrinsics;
-  defm pusati_w : RVPBinaryAABIntrinsics;
-  defm usati    : RVPBinaryAABIntrinsics;
-  defm psrai_b  : RVPBinaryAABIntrinsics;
-  defm psrai_h  : RVPBinaryAABIntrinsics;
-  defm psrai_w  : RVPBinaryAABIntrinsics;
-  defm psrari_h : RVPBinaryAABIntrinsics;
-  defm psrari_w : RVPBinaryAABIntrinsics;
-  defm srari    : RVPBinaryAABIntrinsics;
-  defm psati_h  : RVPBinaryAABIntrinsics;
-  defm psati_w  : RVPBinaryAABIntrinsics;
-  defm sati     : RVPBinaryAABIntrinsics;
+  defm pslli_b    : RVPBinaryAABIntrinsics;
+  defm pslli_h    : RVPBinaryAABIntrinsics;
+  defm pslli_w    : RVPBinaryAABIntrinsics;
+  defm psslai_h   : RVPBinaryAABIntrinsics;
+  defm psslai_w   : RVPBinaryAABIntrinsics;
+  defm sslai      : RVPBinaryAABIntrinsics;
+  defm psrli_b    : RVPBinaryAABIntrinsics;
+  defm psrli_h    : RVPBinaryAABIntrinsics;
+  defm psrli_w    : RVPBinaryAABIntrinsics;
+  defm pusati_h   : RVPBinaryAABIntrinsics;
+  defm pusati_w   : RVPBinaryAABIntrinsics;
+  defm usati      : RVPBinaryAABIntrinsics;
+  defm psrai_b    : RVPBinaryAABIntrinsics;
+  defm psrai_h    : RVPBinaryAABIntrinsics;
+  defm psrai_w    : RVPBinaryAABIntrinsics;
+  defm psrari_h   : RVPBinaryAABIntrinsics;
+  defm psrari_w   : RVPBinaryAABIntrinsics;
+  defm srari      : RVPBinaryAABIntrinsics;
+  defm psati_h    : RVPBinaryAABIntrinsics;
+  defm psati_w    : RVPBinaryAABIntrinsics;
+  defm sati       : RVPBinaryAABIntrinsics;
+  defm pmulhrsu_h : RVPBinaryAABIntrinsics;
+  defm pmulhrsu_w : RVPBinaryAABIntrinsics;
+  defm mulhsu_h0  : RVPBinaryAABIntrinsics;
+  defm mulhsu_h1  : RVPBinaryAABIntrinsics;
+  defm mulhrsu    : RVPBinaryAABIntrinsics;
 
   class RVPBinaryABBIntrinsics
       : Intrinsic<[llvm_any_ty],
@@ -2104,6 +2121,10 @@ let TargetPrefix = "riscv" in {
   defm mulu_w00    : RVPBinaryABBIntrinsics;
   defm mulu_h11    : RVPBinaryABBIntrinsics;
   defm mulu_w11    : RVPBinaryABBIntrinsics;
+  defm pmulh_h_b0  : RVPBinaryABBIntrinsics;
+  defm pmulh_w_h0  : RVPBinaryABBIntrinsics;
+  defm pmulh_h_b1  : RVPBinaryABBIntrinsics;
+  defm pmulh_w_h1  : RVPBinaryABBIntrinsics;
 
   class RVPBinaryABCIntrinsics
       : Intrinsic<[llvm_any_ty],
@@ -2122,6 +2143,12 @@ let TargetPrefix = "riscv" in {
   defm mulsu_w00         : RVPBinaryABCIntrinsics;
   defm mulsu_h11         : RVPBinaryABCIntrinsics;
   defm mulsu_w11         : RVPBinaryABCIntrinsics;
+  defm pmulhsu_h         : RVPBinaryABCIntrinsics;
+  defm pmulhsu_w         : RVPBinaryABCIntrinsics;
+  defm pmulhsu_h_b0      : RVPBinaryABCIntrinsics;
+  defm pmulhsu_w_h0      : RVPBinaryABCIntrinsics;
+  defm pmulhsu_h_b1      : RVPBinaryABCIntrinsics;
+  defm pmulhsu_w_h1      : RVPBinaryABCIntrinsics;
 } // TargetPrefix = "riscv"
 
 // Vendor extensions
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 7b249581b0b93..f11cf7fe167f9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -978,6 +978,16 @@ def : PatGprGpr<int_riscv_pmax_b,       PMAX_B>;
 def : PatGprGpr<int_riscv_pmax_h,       PMAX_H>;
 def : PatGprGpr<int_riscv_pmaxu_b,      PMAXU_B>;
 def : PatGprGpr<int_riscv_pmaxu_h,      PMAXU_H>;
+def : PatGprGpr<int_riscv_pmulh_h,      PMULH_H>;
+def : PatGprGpr<int_riscv_pmulh_h_b0,   PMULH_H_B0>;
+def : PatGprGpr<int_riscv_pmulh_h_b1,   PMULH_H_B1>;
+def : PatGprGpr<int_riscv_pmulhu_h,     PMULHU_H>;
+def : PatGprGpr<int_riscv_pmulhr_h,     PMULHR_H>;
+def : PatGprGpr<int_riscv_pmulhru_h,    PMULHRU_H>;
+def : PatGprGpr<int_riscv_pmulhsu_h,    PMULHSU_H>;
+def : PatGprGpr<int_riscv_pmulhsu_h_b0, PMULHSU_H_B0>;
+def : PatGprGpr<int_riscv_pmulhsu_h_b1, PMULHSU_H_B1>;
+def : PatGprGpr<int_riscv_pmulhrsu_h,   PMULHRSU_H>;
 } // Predicates = [HasStdExtP]
 
 let Predicates = [HasStdExtP, IsRV32] in {
@@ -1010,6 +1020,13 @@ def : PatGprGpr<int_riscv_packt,     PACKT_RV32>;
 def : PatGprGpr<int_riscv_mseq,      MSEQ>;
 def : PatGprGpr<int_riscv_mslt,      MSLT>;
 def : PatGprGpr<int_riscv_msltu,     MSLTU>;
+def : PatGprGpr<int_riscv_mulh_h1,   MULH_H1>;
+def : PatGprGpr<int_riscv_mulhr,     MULHR>;
+def : PatGprGpr<int_riscv_mulhru,    MULHRU>;
+def : PatGprGpr<int_riscv_mulh_h0,   MULH_H0>;
+def : PatGprGpr<int_riscv_mulhsu_h0, MULHSU_H0>;
+def : PatGprGpr<int_riscv_mulhsu_h1, MULHSU_H1>;
+def : PatGprGpr<int_riscv_mulhrsu,   MULHRSU>;
 } // Predicates = [HasStdExtP, IsRV32]
 
 let Predicates = [HasStdExtP, IsRV64] in {
@@ -1089,4 +1106,14 @@ def : PatGprGpr<int_riscv_pmin_w,       PMIN_W>;
 def : PatGprGpr<int_riscv_pminu_w,      PMINU_W>;
 def : PatGprGpr<int_riscv_pmax_w,       PMAX_W>;
 def : PatGprGpr<int_riscv_pmaxu_w,      PMAXU_W>;
+def : PatGprGpr<int_riscv_pmulh_w,      PMULH_W>;
+def : PatGprGpr<int_riscv_pmulh_w_h0,   PMULH_W_H0>;
+def : PatGprGpr<int_riscv_pmulh_w_h1,   PMULH_W_H1>;
+def : PatGprGpr<int_riscv_pmulhu_w,     PMULHU_W>;
+def : PatGprGpr<int_riscv_pmulhr_w,     PMULHR_W>;
+def : PatGprGpr<int_riscv_pmulhru_w,    PMULHRU_W>;
+def : PatGprGpr<int_riscv_pmulhsu_w,    PMULHSU_W>;
+def : PatGprGpr<int_riscv_pmulhsu_w_h0, PMULHSU_W_H0>;
+def : PatGprGpr<int_riscv_pmulhsu_w_h1, PMULHSU_W_H1>;
+def : PatGprGpr<int_riscv_pmulhrsu_w,   PMULHRSU_W>;
 } // Predicates = [HasStdExtP, IsRV64]
\ No newline at end of file
diff --git a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
index 49073b5821dcb..4300fc787d17a 100644
--- a/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv32simd-intrinsic.ll
@@ -1233,3 +1233,190 @@ define i32 @pmaxu_h(i32 %a, i32 %b) {
     %tmp = call i32 @llvm.riscv.pmaxu.h.i32(i32 %a, i32 %b)
     ret i32 %tmp
 }
+
+declare i32 @llvm.riscv.pmulh.h.i32(i32, i32)
+
+define i32 @pmulh_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pmulh_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmulh.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmulh.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmulh.h.b0.i32.i32(i32, i32)
+
+define i32 @pmulh_h_b0(i32 %a, i32 %b) {
+; RV32P-LABEL: pmulh_h_b0:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmulh.h.b0 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmulh.h.b0.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmulh.h.b1.i32.i32(i32, i32)
+
+define i32 @pmulh_h_b1(i32 %a, i32 %b) {
+; RV32P-LABEL: pmulh_h_b1:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmulh.h.b1 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmulh.h.b1.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmulhu.h.i32(i32, i32)
+
+define i32 @pmulhu_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pmulhu_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmulhu.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmulhu.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmulhr.h.i32(i32, i32)
+
+define i32 @pmulhr_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pmulhr_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmulhr.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmulhr.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmulhru.h.i32(i32, i32)
+
+define i32 @pmulhru_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pmulhru_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmulhru.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmulhru.h.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmulhsu.h.i32.i32.i32(i32, i32)
+
+define i32 @pmulhsu_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pmulhsu_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmulhsu.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmulhsu.h.i32.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmulhsu.h.b0.i32.i32.i32(i32, i32)
+
+define i32 @pmulhsu_h_b0(i32 %a, i32 %b) {
+; RV32P-LABEL: pmulhsu_h_b0:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmulhsu.h.b0 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmulhsu.h.b0.i32.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmulhsu.h.b1.i32.i32.i32(i32, i32)
+
+define i32 @pmulhsu_h_b1(i32 %a, i32 %b) {
+; RV32P-LABEL: pmulhsu_h_b1:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmulhsu.h.b1 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmulhsu.h.b1.i32.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.pmulhrsu.h.i32.i32(i32, i32)
+
+define i32 @pmulhrsu_h(i32 %a, i32 %b) {
+; RV32P-LABEL: pmulhrsu_h:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    pmulhrsu.h a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.pmulhrsu.h.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.mulh.h1.i32(i32, i32)
+
+define i32 @mulh_h1(i32 %a, i32 %b) {
+; RV32P-LABEL: mulh_h1:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    mulh.h1 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.mulh.h1.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.mulhr.i32(i32, i32)
+
+define i32 @mulhr(i32 %a, i32 %b) {
+; RV32P-LABEL: mulhr:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    mulhr a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.mulhr.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.mulhru.i32(i32, i32)
+
+define i32 @mulhru(i32 %a, i32 %b) {
+; RV32P-LABEL: mulhru:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    mulhru a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.mulhru.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.mulh.h0.i32(i32, i32)
+
+define i32 @mulh_h0(i32 %a, i32 %b) {
+; RV32P-LABEL: mulh_h0:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    mulh.h0 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.mulh.h0.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.mulhsu.h0.i32.i32(i32, i32)
+
+define i32 @mulhsu_h0(i32 %a, i32 %b) {
+; RV32P-LABEL: mulhsu_h0:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    mulhsu.h0 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.mulhsu.h0.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.mulhsu.h1.i32.i32(i32, i32)
+
+define i32 @mulhsu_h1(i32 %a, i32 %b) {
+; RV32P-LABEL: mulhsu_h1:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    mulhsu.h1 a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.mulhsu.h1.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
+
+declare i32 @llvm.riscv.mulhrsu.i32.i32(i32, i32)
+
+define i32 @mulhrsu(i32 %a, i32 %b) {
+; RV32P-LABEL: mulhrsu:
+; RV32P:       # %bb.0:
+; RV32P-NEXT:    mulhrsu a0, a0, a1
+; RV32P-NEXT:    ret
+    %tmp = call i32 @llvm.riscv.mulhrsu.i32.i32(i32 %a, i32 %b)
+    ret i32 %tmp
+}
diff --git a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
index 5e6d242e9c860..2f20337dc3ec0 100644
--- a/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
+++ b/llvm/test/CodeGen/RISCV/rv64simd-intrinsic.ll
@@ -1750,3 +1750,223 @@ define i64 @pmaxu_w(i64 %a, i64 %b) {
     %tmp = call i64 @llvm.riscv.pmaxu.w.i64(i64 %a, i64 %b)
     ret i64 %tmp
 }
+
+declare i64 @llvm.riscv.pmulh.h.i64(i64, i64)
+
+define i64 @pmulh_h(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulh_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulh.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulh.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulh.w.i64(i64, i64)
+
+define i64 @pmulh_w(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulh_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulh.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulh.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulh.h.b0.i64.i64(i64, i64)
+
+define i64 @pmulh_h_b0(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulh_h_b0:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulh.h.b0 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulh.h.b0.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulh.w.h0.i64.i64(i64, i64)
+
+define i64 @pmulh_w_h0(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulh_w_h0:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulh.w.h0 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulh.w.h0.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulh.h.b1.i64.i64(i64, i64)
+
+define i64 @pmulh_h_b1(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulh_h_b1:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulh.h.b1 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulh.h.b1.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulh.w.h1.i64.i64(i64, i64)
+
+define i64 @pmulh_w_h1(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulh_w_h1:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulh.w.h1 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulh.w.h1.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulhu.h.i64(i64, i64)
+
+define i64 @pmulhu_h(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulhu_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulhu.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulhu.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulhu.w.i64(i64, i64)
+
+define i64 @pmulhu_w(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulhu_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulhu.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulhu.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulhr.h.i64(i64, i64)
+
+define i64 @pmulhr_h(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulhr_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulhr.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulhr.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulhr.w.i64(i64, i64)
+
+define i64 @pmulhr_w(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulhr_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulhr.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulhr.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulhru.h.i64(i64, i64)
+
+define i64 @pmulhru_h(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulhru_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulhru.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulhru.h.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulhru.w.i64(i64, i64)
+
+define i64 @pmulhru_w(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulhru_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulhru.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulhru.w.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulhsu.h.i64.i64.i64(i64, i64)
+
+define i64 @pmulhsu_h(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulhsu_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulhsu.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulhsu.h.i64.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulhsu.w.i64.i64.i64(i64, i64)
+
+define i64 @pmulhsu_w(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulhsu_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulhsu.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulhsu.w.i64.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulhsu.h.b0.i64.i64.i64(i64, i64)
+
+define i64 @pmulhsu_h_b0(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulhsu_h_b0:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulhsu.h.b0 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulhsu.h.b0.i64.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulhsu.w.h0.i64.i64.i64(i64, i64)
+
+define i64 @pmulhsu_w_h0(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulhsu_w_h0:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulhsu.w.h0 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulhsu.w.h0.i64.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulhsu.h.b1.i64.i64.i64(i64, i64)
+
+define i64 @pmulhsu_h_b1(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulhsu_h_b1:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulhsu.h.b1 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulhsu.h.b1.i64.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulhsu.w.h1.i64.i64.i64(i64, i64)
+
+define i64 @pmulhsu_w_h1(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulhsu_w_h1:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulhsu.w.h1 a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulhsu.w.h1.i64.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulhrsu.h.i64.i64(i64, i64)
+
+define i64 @pmulhrsu_h(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulhrsu_h:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulhrsu.h a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulhrsu.h.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}
+
+declare i64 @llvm.riscv.pmulhrsu.w.i64.i64(i64, i64)
+
+define i64 @pmulhrsu_w(i64 %a, i64 %b) {
+; RV64P-LABEL: pmulhrsu_w:
+; RV64P:       # %bb.0:
+; RV64P-NEXT:    pmulhrsu.w a0, a0, a1
+; RV64P-NEXT:    ret
+    %tmp = call i64 @llvm.riscv.pmulhrsu.w.i64.i64(i64 %a, i64 %b)
+    ret i64 %tmp
+}

>From b2cd68064bab32b89e3304b2ac1e4454b5c67cd6 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Thu, 4 Sep 2025 22:09:58 +0800
Subject: [PATCH 22/40] [RISCV] Packed Unsigned Saturating Immediate C
 intrinsics

---
 clang/include/clang/Basic/BuiltinsRISCV.td    |  5 ++++
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp    | 16 ++++++++++
 clang/lib/Headers/riscv_simd.h                | 25 ++++++++++++++++
 .../RISCV/rvp-intrinsics/riscv32-simd.c       | 18 +++++++++++
 .../RISCV/rvp-intrinsics/riscv64-simd.c       | 30 +++++++++++++++++++
 5 files changed, 94 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index ee60709008610..1537051d944ab 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -150,6 +150,8 @@ def psll_hs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def padd_bs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def padd_hs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def sadd        : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def pusati_h_32 : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
+def usati_32    : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
 } // Features = "experimental-p,32bit"
 
 let Features = "experimental-p,64bit" in {
@@ -164,6 +166,9 @@ def psll_ws     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def padd_bs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def padd_hs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def padd_ws     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pusati_h_64 : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def pusati_w    : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def usati_64    : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index 00ede2b2de64e..cdd7a17e350b2 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -304,6 +304,11 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_psslai_h_32:
   case RISCV::BI__builtin_riscv_psslai_h_64:
   case RISCV::BI__builtin_riscv_psslai_w:
+  case RISCV::BI__builtin_riscv_pusati_h_32:
+  case RISCV::BI__builtin_riscv_pusati_h_64:
+  case RISCV::BI__builtin_riscv_pusati_w:
+  case RISCV::BI__builtin_riscv_usati_32:
+  case RISCV::BI__builtin_riscv_usati_64:
   case RISCV::BI__builtin_riscv_sslai: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -328,6 +333,17 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_sslai:
       ID = Intrinsic::riscv_sslai;
       break;
+    case RISCV::BI__builtin_riscv_pusati_h_32:
+    case RISCV::BI__builtin_riscv_pusati_h_64:
+      ID = Intrinsic::riscv_pusati_h;
+      break;
+    case RISCV::BI__builtin_riscv_pusati_w:
+      ID = Intrinsic::riscv_pusati_w;
+      break;
+    case RISCV::BI__builtin_riscv_usati_32:
+    case RISCV::BI__builtin_riscv_usati_64:
+      ID = Intrinsic::riscv_usati;
+      break;
     }
     IntrinsicTypes = {ResultType, Ops[1]->getType()};
     break;
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index f3c6d20853b54..930e455e6b678 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -60,6 +60,16 @@ __riscv_padd_hs(uint32_t __x, uint32_t __y) {
   return __builtin_riscv_padd_hs_32(__x, __y);
 }
 
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pusati_h(uint32_t __x, int __y) {
+  return __builtin_riscv_pusati_h_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_usati(uint32_t __x, int __y) {
+  return __builtin_riscv_usati_32(__x, __y);
+}
+
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
 __riscv_sadd(int32_t __x, int32_t __y) {
   return __builtin_riscv_sadd(__x, __y);
@@ -122,6 +132,21 @@ static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __riscv_padd_ws(uint64_t __x, uint64_t __y) {
   return __builtin_riscv_padd_ws(__x, __y);
 }
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pusati_h(uint64_t __x, int __y) {
+  return __builtin_riscv_pusati_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pusati_w(uint64_t __x, int __y) {
+  return __builtin_riscv_pusati_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_usati(uint64_t __x, int __y) {
+  return __builtin_riscv_usati_64(__x, __y);
+}
 #endif
 
 #endif // defined(__riscv_p)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
index feedca5c72817..8b42471ae9971 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
@@ -77,6 +77,24 @@ uint32_t padd_hs(uint32_t rs1, uint32_t rs2) {
   return __riscv_padd_hs(rs1, rs2);
 }
 
+// RV32P-LABEL: @pusati_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pusati.h.i32.i32(i32 [[RS1:%.*]], i32 1)
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pusati_h(uint32_t rs1, int32_t rs2) {
+  return __riscv_pusati_h(rs1, 1);
+}
+
+// RV32P-LABEL: @usati(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.usati.i32.i32(i32 [[RS1:%.*]], i32 1)
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t usati(uint32_t rs1, int32_t rs2) {
+  return __riscv_usati(rs1, 1);
+}
+
 // RV32P-LABEL: @sadd(
 // RV32P-NEXT:  entry:
 // RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.sadd.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
index 4dc1cad0905e3..257b6874e028b 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -108,3 +108,33 @@ uint64_t padd_hs(uint64_t rs1, uint64_t rs2) {
 uint64_t padd_ws(uint64_t rs1, uint64_t rs2) {
   return __riscv_padd_ws(rs1, rs2);
 }
+
+// RV64P-LABEL: @pusati_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[CONV_I:%.*]] = sext i32 1 to i64
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pusati.h.i64.i64(i64 [[RS1:%.*]], i64 [[CONV_I]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pusati_h(uint64_t rs1, int64_t rs2) {
+  return __riscv_pusati_h(rs1, 1);
+}
+
+// RV64P-LABEL: @pusati_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[CONV_I:%.*]] = sext i32 1 to i64
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pusati.w.i64.i64(i64 [[RS1:%.*]], i64 [[CONV_I]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pusati_w(uint64_t rs1, int64_t rs2) {
+  return __riscv_pusati_w(rs1, 1);
+}
+
+// RV64P-LABEL: @usati(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[CONV_I:%.*]] = sext i32 1 to i64
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.usati.i64.i64(i64 [[RS1:%.*]], i64 [[CONV_I]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t usati(uint64_t rs1, int64_t rs2) {
+  return __riscv_usati(rs1, 1);
+}

>From e3e08b6cd1a841307919b760e1e216e1d32d6fbe Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Thu, 4 Sep 2025 22:18:00 +0800
Subject: [PATCH 23/40] [RISCV] Packed Arithmetic Shift Right Immediate C
 intrinsics

---
 clang/include/clang/Basic/BuiltinsRISCV.td    | 10 ++++
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp    | 32 ++++++++++
 clang/lib/Headers/riscv_simd.h                | 50 ++++++++++++++++
 .../RISCV/rvp-intrinsics/riscv32-simd.c       | 36 +++++++++++
 .../RISCV/rvp-intrinsics/riscv64-simd.c       | 60 +++++++++++++++++++
 5 files changed, 188 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index 1537051d944ab..8a4c0b593c4cc 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -152,6 +152,10 @@ def padd_hs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def sadd        : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
 def pusati_h_32 : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
 def usati_32    : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
+def psrai_b_32  : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
+def psrai_h_32  : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
+def psrari_h_32 : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
+def srari_32    : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
 } // Features = "experimental-p,32bit"
 
 let Features = "experimental-p,64bit" in {
@@ -169,6 +173,12 @@ def padd_ws     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def pusati_h_64 : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
 def pusati_w    : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
 def usati_64    : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def psrai_b_64  : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def psrai_h_64  : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def psrai_w     : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def psrari_h_64 : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def psrari_w    : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def srari_64    : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index cdd7a17e350b2..947993682ec9c 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -309,6 +309,16 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_pusati_w:
   case RISCV::BI__builtin_riscv_usati_32:
   case RISCV::BI__builtin_riscv_usati_64:
+  case RISCV::BI__builtin_riscv_psrai_b_32:
+  case RISCV::BI__builtin_riscv_psrai_b_64:
+  case RISCV::BI__builtin_riscv_psrai_h_32:
+  case RISCV::BI__builtin_riscv_psrai_h_64:
+  case RISCV::BI__builtin_riscv_psrai_w:
+  case RISCV::BI__builtin_riscv_psrari_h_32:
+  case RISCV::BI__builtin_riscv_psrari_h_64:
+  case RISCV::BI__builtin_riscv_psrari_w:
+  case RISCV::BI__builtin_riscv_srari_32:
+  case RISCV::BI__builtin_riscv_srari_64:
   case RISCV::BI__builtin_riscv_sslai: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -344,6 +354,28 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_usati_64:
       ID = Intrinsic::riscv_usati;
       break;
+    case RISCV::BI__builtin_riscv_psrai_b_32:
+    case RISCV::BI__builtin_riscv_psrai_b_64:
+      ID = Intrinsic::riscv_psrai_b;
+      break;
+    case RISCV::BI__builtin_riscv_psrai_h_32:
+    case RISCV::BI__builtin_riscv_psrai_h_64:
+      ID = Intrinsic::riscv_psrai_h;
+      break;
+    case RISCV::BI__builtin_riscv_psrai_w:
+      ID = Intrinsic::riscv_psrai_w;
+      break;
+    case RISCV::BI__builtin_riscv_psrari_h_32:
+    case RISCV::BI__builtin_riscv_psrari_h_64:
+      ID = Intrinsic::riscv_psrari_h;
+      break;
+    case RISCV::BI__builtin_riscv_psrari_w:
+      ID = Intrinsic::riscv_psrari_w;
+      break;
+    case RISCV::BI__builtin_riscv_srari_32:
+    case RISCV::BI__builtin_riscv_srari_64:
+      ID = Intrinsic::riscv_srari;
+      break;
     }
     IntrinsicTypes = {ResultType, Ops[1]->getType()};
     break;
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index 930e455e6b678..5af768165b5cb 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -70,6 +70,26 @@ __riscv_usati(uint32_t __x, int __y) {
   return __builtin_riscv_usati_32(__x, __y);
 }
 
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psrai_b(uint32_t __x, int __y) {
+  return __builtin_riscv_psrai_b_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psrai_h(uint32_t __x, int __y) {
+  return __builtin_riscv_psrai_h_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psrari_h(uint32_t __x, int __y) {
+  return __builtin_riscv_psrari_h_32(__x, __y);
+}
+
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_srari(int32_t __x, int __y) {
+  return __builtin_riscv_srari_32(__x, __y);
+}
+
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
 __riscv_sadd(int32_t __x, int32_t __y) {
   return __builtin_riscv_sadd(__x, __y);
@@ -147,6 +167,36 @@ static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __riscv_usati(uint64_t __x, int __y) {
   return __builtin_riscv_usati_64(__x, __y);
 }
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psrai_b(uint64_t __x, int __y) {
+  return __builtin_riscv_psrai_b_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psrai_h(uint64_t __x, int __y) {
+  return __builtin_riscv_psrai_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psrai_w(uint64_t __x, int __y) {
+  return __builtin_riscv_psrai_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psrari_h(uint64_t __x, int __y) {
+  return __builtin_riscv_psrari_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psrari_w(uint64_t __x, int __y) {
+  return __builtin_riscv_psrari_w(__x, __y);
+}
+
+static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_srari(int64_t __x, int __y) {
+  return __builtin_riscv_srari_64(__x, __y);
+}
 #endif
 
 #endif // defined(__riscv_p)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
index 8b42471ae9971..7c481d8ee30a2 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
@@ -95,6 +95,42 @@ uint32_t usati(uint32_t rs1, int32_t rs2) {
   return __riscv_usati(rs1, 1);
 }
 
+// RV32P-LABEL: @psrai_b(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.psrai.b.i32.i32(i32 [[RS1:%.*]], i32 1)
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t psrai_b(uint32_t rs1, int32_t rs2) {
+  return __riscv_psrai_b(rs1, 1);
+}
+
+// RV32P-LABEL: @psrai_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.psrai.h.i32.i32(i32 [[RS1:%.*]], i32 1)
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t psrai_h(uint32_t rs1, int32_t rs2) {
+  return __riscv_psrai_h(rs1, 1);
+}
+
+// RV32P-LABEL: @psrari_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.psrari.h.i32.i32(i32 [[RS1:%.*]], i32 1)
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t psrari_h(uint32_t rs1, int32_t rs2) {
+  return __riscv_psrari_h(rs1, 1);
+}
+
+// RV32P-LABEL: @srari(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.srari.i32.i32(i32 [[RS1:%.*]], i32 1)
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+int32_t srari(int32_t rs1, int32_t rs2) {
+  return __riscv_srari(rs1, 1);
+}
+
 // RV32P-LABEL: @sadd(
 // RV32P-NEXT:  entry:
 // RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.sadd.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
index 257b6874e028b..83254cee021f5 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -138,3 +138,63 @@ uint64_t pusati_w(uint64_t rs1, int64_t rs2) {
 uint64_t usati(uint64_t rs1, int64_t rs2) {
   return __riscv_usati(rs1, 1);
 }
+
+// RV64P-LABEL: @psrai_b(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[CONV_I:%.*]] = sext i32 1 to i64
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psrai.b.i64.i64(i64 [[RS1:%.*]], i64 [[CONV_I]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psrai_b(uint64_t rs1, int64_t rs2) {
+  return __riscv_psrai_b(rs1, 1);
+}
+
+// RV64P-LABEL: @psrai_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[CONV_I:%.*]] = sext i32 1 to i64
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psrai.h.i64.i64(i64 [[RS1:%.*]], i64 [[CONV_I]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psrai_h(uint64_t rs1, int64_t rs2) {
+  return __riscv_psrai_h(rs1, 1);
+}
+
+// RV64P-LABEL: @psrai_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[CONV_I:%.*]] = sext i32 1 to i64
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psrai.w.i64.i64(i64 [[RS1:%.*]], i64 [[CONV_I]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psrai_w(uint64_t rs1, int64_t rs2) {
+  return __riscv_psrai_w(rs1, 1);
+}
+
+// RV64P-LABEL: @psrari_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[CONV_I:%.*]] = sext i32 1 to i64
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psrari.h.i64.i64(i64 [[RS1:%.*]], i64 [[CONV_I]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psrari_h(uint64_t rs1, int64_t rs2) {
+  return __riscv_psrari_h(rs1, 1);
+}
+
+// RV64P-LABEL: @psrari_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[CONV_I:%.*]] = sext i32 1 to i64
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psrari.w.i64.i64(i64 [[RS1:%.*]], i64 [[CONV_I]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psrari_w(uint64_t rs1, int64_t rs2) {
+  return __riscv_psrari_w(rs1, 1);
+}
+
+// RV64P-LABEL: @srari(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[CONV_I:%.*]] = sext i32 1 to i64
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.srari.i64.i64(i64 [[RS1:%.*]], i64 [[CONV_I]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+int64_t srari(int64_t rs1, int64_t rs2) {
+  return __riscv_srari(rs1, 1);
+}

>From 2938ec2125f308b3c0c12d89f281e32078b9d3d5 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Thu, 4 Sep 2025 22:23:16 +0800
Subject: [PATCH 24/40] [RISCV] Packed Signed Saturating Immediate C intrinsics

---
 clang/include/clang/Basic/BuiltinsRISCV.td    |  5 ++++
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp    | 16 ++++++++++
 clang/lib/Headers/riscv_simd.h                | 25 ++++++++++++++++
 .../RISCV/rvp-intrinsics/riscv32-simd.c       | 18 +++++++++++
 .../RISCV/rvp-intrinsics/riscv64-simd.c       | 30 +++++++++++++++++++
 5 files changed, 94 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index 8a4c0b593c4cc..f7394ded821ac 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -156,6 +156,8 @@ def psrai_b_32  : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
 def psrai_h_32  : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
 def psrari_h_32 : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
 def srari_32    : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def psati_h_32  : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
+def sati_32     : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
 } // Features = "experimental-p,32bit"
 
 let Features = "experimental-p,64bit" in {
@@ -179,6 +181,9 @@ def psrai_w     : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
 def psrari_h_64 : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
 def psrari_w    : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
 def srari_64    : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
+def psati_h_64  : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def psati_w     : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def sati_64     : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index 947993682ec9c..788faaeaa6431 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -319,6 +319,11 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_psrari_w:
   case RISCV::BI__builtin_riscv_srari_32:
   case RISCV::BI__builtin_riscv_srari_64:
+  case RISCV::BI__builtin_riscv_psati_h_32:
+  case RISCV::BI__builtin_riscv_psati_h_64:
+  case RISCV::BI__builtin_riscv_psati_w:
+  case RISCV::BI__builtin_riscv_sati_32:
+  case RISCV::BI__builtin_riscv_sati_64:
   case RISCV::BI__builtin_riscv_sslai: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -376,6 +381,17 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_srari_64:
       ID = Intrinsic::riscv_srari;
       break;
+    case RISCV::BI__builtin_riscv_psati_h_32:
+    case RISCV::BI__builtin_riscv_psati_h_64:
+      ID = Intrinsic::riscv_psati_h;
+      break;
+    case RISCV::BI__builtin_riscv_psati_w:
+      ID = Intrinsic::riscv_psati_w;
+      break;
+    case RISCV::BI__builtin_riscv_sati_32:
+    case RISCV::BI__builtin_riscv_sati_64:
+      ID = Intrinsic::riscv_sati;
+      break;
     }
     IntrinsicTypes = {ResultType, Ops[1]->getType()};
     break;
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index 5af768165b5cb..bd0dae9c5d3bf 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -90,6 +90,16 @@ __riscv_srari(int32_t __x, int __y) {
   return __builtin_riscv_srari_32(__x, __y);
 }
 
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psati_h(uint32_t __x, int __y) {
+  return __builtin_riscv_psati_h_32(__x, __y);
+}
+
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_sati(int32_t __x, int __y) {
+  return __builtin_riscv_sati_32(__x, __y);
+}
+
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
 __riscv_sadd(int32_t __x, int32_t __y) {
   return __builtin_riscv_sadd(__x, __y);
@@ -197,6 +207,21 @@ static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
 __riscv_srari(int64_t __x, int __y) {
   return __builtin_riscv_srari_64(__x, __y);
 }
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psati_h(uint64_t __x, int __y) {
+  return __builtin_riscv_psati_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psati_w(uint64_t __x, int __y) {
+  return __builtin_riscv_psati_w(__x, __y);
+}
+
+static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_sati(int64_t __x, int __y) {
+  return __builtin_riscv_sati_64(__x, __y);
+}
 #endif
 
 #endif // defined(__riscv_p)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
index 7c481d8ee30a2..88af5da8ee60a 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
@@ -131,6 +131,24 @@ int32_t srari(int32_t rs1, int32_t rs2) {
   return __riscv_srari(rs1, 1);
 }
 
+// RV32P-LABEL: @psati_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.psati.h.i32.i32(i32 [[RS1:%.*]], i32 1)
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t psati_h(uint32_t rs1, int32_t rs2) {
+  return __riscv_psati_h(rs1, 1);
+}
+
+// RV32P-LABEL: @sati(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.sati.i32.i32(i32 [[RS1:%.*]], i32 1)
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+int32_t sati(int32_t rs1, int32_t rs2) {
+  return __riscv_sati(rs1, 1);
+}
+
 // RV32P-LABEL: @sadd(
 // RV32P-NEXT:  entry:
 // RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.sadd.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
index 83254cee021f5..aeb518bd5dbaf 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -198,3 +198,33 @@ uint64_t psrari_w(uint64_t rs1, int64_t rs2) {
 int64_t srari(int64_t rs1, int64_t rs2) {
   return __riscv_srari(rs1, 1);
 }
+
+// RV64P-LABEL: @psati_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[CONV_I:%.*]] = sext i32 1 to i64
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psati.h.i64.i64(i64 [[RS1:%.*]], i64 [[CONV_I]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psati_h(uint64_t rs1, int64_t rs2) {
+  return __riscv_psati_h(rs1, 1);
+}
+
+// RV64P-LABEL: @psati_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[CONV_I:%.*]] = sext i32 1 to i64
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psati.w.i64.i64(i64 [[RS1:%.*]], i64 [[CONV_I]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psati_w(uint64_t rs1, int64_t rs2) {
+  return __riscv_psati_w(rs1, 1);
+}
+
+// RV64P-LABEL: @sati(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[CONV_I:%.*]] = sext i32 1 to i64
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.sati.i64.i64(i64 [[RS1:%.*]], i64 [[CONV_I]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+int64_t sati(int64_t rs1, int64_t rs2) {
+  return __riscv_sati(rs1, 1);
+}

>From be81ee52f3d5c5b418d85e0e95f4e7ec0a56cfa1 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Thu, 4 Sep 2025 22:26:15 +0800
Subject: [PATCH 25/40] [RISCV] Packed Shift Right Logical Register C
 intrinsics

---
 clang/include/clang/Basic/BuiltinsRISCV.td    |  5 ++++
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp    | 16 +++++++++++
 clang/lib/Headers/riscv_simd.h                | 25 +++++++++++++++++
 .../RISCV/rvp-intrinsics/riscv32-simd.c       | 18 +++++++++++++
 .../RISCV/rvp-intrinsics/riscv64-simd.c       | 27 +++++++++++++++++++
 5 files changed, 91 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index f7394ded821ac..b111a232b3335 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -158,6 +158,8 @@ def psrari_h_32 : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
 def srari_32    : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
 def psati_h_32  : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
 def sati_32     : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def psrl_bs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psrl_hs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 } // Features = "experimental-p,32bit"
 
 let Features = "experimental-p,64bit" in {
@@ -184,6 +186,9 @@ def srari_64    : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
 def psati_h_64  : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
 def psati_w     : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
 def sati_64     : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
+def psrl_bs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psrl_hs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psrl_ws     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index 788faaeaa6431..c3de62a6d21fa 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -219,6 +219,11 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_padd_hs_32:
   case RISCV::BI__builtin_riscv_padd_hs_64:
   case RISCV::BI__builtin_riscv_padd_ws:
+  case RISCV::BI__builtin_riscv_psrl_bs_32:
+  case RISCV::BI__builtin_riscv_psrl_bs_64:
+  case RISCV::BI__builtin_riscv_psrl_hs_32:
+  case RISCV::BI__builtin_riscv_psrl_hs_64:
+  case RISCV::BI__builtin_riscv_psrl_ws:
   case RISCV::BI__builtin_riscv_sadd: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -287,6 +292,17 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_padd_ws:
       ID = Intrinsic::riscv_padd_ws;
       break;
+    case RISCV::BI__builtin_riscv_psrl_bs_32:
+    case RISCV::BI__builtin_riscv_psrl_bs_64:
+      ID = Intrinsic::riscv_psrl_bs;
+      break;
+    case RISCV::BI__builtin_riscv_psrl_hs_32:
+    case RISCV::BI__builtin_riscv_psrl_hs_64:
+      ID = Intrinsic::riscv_psrl_hs;
+      break;
+    case RISCV::BI__builtin_riscv_psrl_ws:
+      ID = Intrinsic::riscv_psrl_ws;
+      break;
     case RISCV::BI__builtin_riscv_sadd:
       ID = Intrinsic::riscv_sadd;
       break;
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index bd0dae9c5d3bf..7b04db55bd8fc 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -100,6 +100,16 @@ __riscv_sati(int32_t __x, int __y) {
   return __builtin_riscv_sati_32(__x, __y);
 }
 
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psrl_bs(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_psrl_bs_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psrl_hs(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_psrl_hs_32(__x, __y);
+}
+
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
 __riscv_sadd(int32_t __x, int32_t __y) {
   return __builtin_riscv_sadd(__x, __y);
@@ -222,6 +232,21 @@ static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
 __riscv_sati(int64_t __x, int __y) {
   return __builtin_riscv_sati_64(__x, __y);
 }
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psrl_bs(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psrl_bs_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psrl_hs(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psrl_hs_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psrl_ws(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psrl_ws(__x, __y);
+}
 #endif
 
 #endif // defined(__riscv_p)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
index 88af5da8ee60a..2af89287d41ca 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
@@ -149,6 +149,24 @@ int32_t sati(int32_t rs1, int32_t rs2) {
   return __riscv_sati(rs1, 1);
 }
 
+// RV32P-LABEL: @psrl_bs(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.psrl.bs.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t psrl_bs(uint32_t rs1, uint32_t rs2) {
+  return __riscv_psrl_bs(rs1, rs2);
+}
+
+// RV32P-LABEL: @psrl_hs(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.psrl.hs.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t psrl_hs(uint32_t rs1, uint32_t rs2) {
+  return __riscv_psrl_hs(rs1, rs2);
+}
+
 // RV32P-LABEL: @sadd(
 // RV32P-NEXT:  entry:
 // RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.sadd.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
index aeb518bd5dbaf..3e613aa15ef18 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -228,3 +228,30 @@ uint64_t psati_w(uint64_t rs1, int64_t rs2) {
 int64_t sati(int64_t rs1, int64_t rs2) {
   return __riscv_sati(rs1, 1);
 }
+
+// RV64P-LABEL: @psrl_bs(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psrl.bs.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psrl_bs(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psrl_bs(rs1, rs2);
+}
+
+// RV64P-LABEL: @psrl_hs(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psrl.hs.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psrl_hs(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psrl_hs(rs1, rs2);
+}
+
+// RV64P-LABEL: @psrl_ws(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psrl.ws.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psrl_ws(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psrl_ws(rs1, rs2);
+}

>From c7bcc0782057b144b066fe7bd0b1d8b459dac9d8 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Thu, 4 Sep 2025 22:31:37 +0800
Subject: [PATCH 26/40] [RISCV] Packed Predicated Summation C intrinsics

---
 clang/include/clang/Basic/BuiltinsRISCV.td    | 10 ++++
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp    | 32 +++++++++++
 clang/lib/Headers/riscv_simd.h                | 50 +++++++++++++++++
 .../RISCV/rvp-intrinsics/riscv32-simd.c       | 36 +++++++++++++
 .../RISCV/rvp-intrinsics/riscv64-simd.c       | 54 +++++++++++++++++++
 5 files changed, 182 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index b111a232b3335..0739162f2745b 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -160,6 +160,10 @@ def psati_h_32  : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
 def sati_32     : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
 def psrl_bs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def psrl_hs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def predsum_bs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def predsum_hs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def predsumu_bs_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def predsumu_hs_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 } // Features = "experimental-p,32bit"
 
 let Features = "experimental-p,64bit" in {
@@ -189,6 +193,12 @@ def sati_64     : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
 def psrl_bs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def psrl_hs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def psrl_ws     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def predsum_bs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def predsum_hs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def predsum_ws     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def predsumu_bs_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def predsumu_hs_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def predsumu_ws    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index c3de62a6d21fa..386f6321959d0 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -224,6 +224,16 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_psrl_hs_32:
   case RISCV::BI__builtin_riscv_psrl_hs_64:
   case RISCV::BI__builtin_riscv_psrl_ws:
+  case RISCV::BI__builtin_riscv_predsum_bs_32:
+  case RISCV::BI__builtin_riscv_predsum_bs_64:
+  case RISCV::BI__builtin_riscv_predsum_hs_32:
+  case RISCV::BI__builtin_riscv_predsum_hs_64:
+  case RISCV::BI__builtin_riscv_predsum_ws:
+  case RISCV::BI__builtin_riscv_predsumu_bs_32:
+  case RISCV::BI__builtin_riscv_predsumu_bs_64:
+  case RISCV::BI__builtin_riscv_predsumu_hs_32:
+  case RISCV::BI__builtin_riscv_predsumu_hs_64:
+  case RISCV::BI__builtin_riscv_predsumu_ws:
   case RISCV::BI__builtin_riscv_sadd: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -303,6 +313,28 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_psrl_ws:
       ID = Intrinsic::riscv_psrl_ws;
       break;
+    case RISCV::BI__builtin_riscv_predsum_bs_32:
+    case RISCV::BI__builtin_riscv_predsum_bs_64:
+      ID = Intrinsic::riscv_predsum_bs;
+      break;
+    case RISCV::BI__builtin_riscv_predsum_hs_32:
+    case RISCV::BI__builtin_riscv_predsum_hs_64:
+      ID = Intrinsic::riscv_predsum_hs;
+      break;
+    case RISCV::BI__builtin_riscv_predsum_ws:
+      ID = Intrinsic::riscv_predsum_ws;
+      break;
+    case RISCV::BI__builtin_riscv_predsumu_bs_32:
+    case RISCV::BI__builtin_riscv_predsumu_bs_64:
+      ID = Intrinsic::riscv_predsumu_bs;
+      break;
+    case RISCV::BI__builtin_riscv_predsumu_hs_32:
+    case RISCV::BI__builtin_riscv_predsumu_hs_64:
+      ID = Intrinsic::riscv_predsumu_hs;
+      break;
+    case RISCV::BI__builtin_riscv_predsumu_ws:
+      ID = Intrinsic::riscv_predsumu_ws;
+      break;
     case RISCV::BI__builtin_riscv_sadd:
       ID = Intrinsic::riscv_sadd;
       break;
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index 7b04db55bd8fc..4e5445705227f 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -110,6 +110,26 @@ __riscv_psrl_hs(uint32_t __x, uint32_t __y) {
   return __builtin_riscv_psrl_hs_32(__x, __y);
 }
 
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_predsum_bs(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_predsum_bs_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_predsum_hs(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_predsum_hs_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_predsumu_bs(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_predsumu_bs_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_predsumu_hs(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_predsumu_hs_32(__x, __y);
+}
+
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
 __riscv_sadd(int32_t __x, int32_t __y) {
   return __builtin_riscv_sadd(__x, __y);
@@ -247,6 +267,36 @@ static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __riscv_psrl_ws(uint64_t __x, uint64_t __y) {
   return __builtin_riscv_psrl_ws(__x, __y);
 }
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_predsum_bs(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_predsum_bs_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_predsum_hs(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_predsum_hs_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_predsum_ws(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_predsum_ws(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_predsumu_bs(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_predsumu_bs_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_predsumu_hs(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_predsumu_hs_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_predsumu_ws(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_predsumu_ws(__x, __y);
+}
 #endif
 
 #endif // defined(__riscv_p)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
index 2af89287d41ca..8048164c99743 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
@@ -167,6 +167,42 @@ uint32_t psrl_hs(uint32_t rs1, uint32_t rs2) {
   return __riscv_psrl_hs(rs1, rs2);
 }
 
+// RV32P-LABEL: @predsum_bs(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.predsum.bs.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t predsum_bs(uint32_t rs1, uint32_t rs2) {
+  return __riscv_predsum_bs(rs1, rs2);
+}
+
+// RV32P-LABEL: @predsum_hs(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.predsum.hs.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t predsum_hs(uint32_t rs1, uint32_t rs2) {
+  return __riscv_predsum_hs(rs1, rs2);
+}
+
+// RV32P-LABEL: @predsumu_bs(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.predsumu.bs.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t predsumu_bs(uint32_t rs1, uint32_t rs2) {
+  return __riscv_predsumu_bs(rs1, rs2);
+}
+
+// RV32P-LABEL: @predsumu_hs(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.predsumu.hs.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t predsumu_hs(uint32_t rs1, uint32_t rs2) {
+  return __riscv_predsumu_hs(rs1, rs2);
+}
+
 // RV32P-LABEL: @sadd(
 // RV32P-NEXT:  entry:
 // RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.sadd.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
index 3e613aa15ef18..667b0da8165e3 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -255,3 +255,57 @@ uint64_t psrl_hs(uint64_t rs1, uint64_t rs2) {
 uint64_t psrl_ws(uint64_t rs1, uint64_t rs2) {
   return __riscv_psrl_ws(rs1, rs2);
 }
+
+// RV64P-LABEL: @predsum_bs(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.predsum.bs.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t predsum_bs(uint64_t rs1, uint64_t rs2) {
+  return __riscv_predsum_bs(rs1, rs2);
+}
+
+// RV64P-LABEL: @predsum_hs(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.predsum.hs.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t predsum_hs(uint64_t rs1, uint64_t rs2) {
+  return __riscv_predsum_hs(rs1, rs2);
+}
+
+// RV64P-LABEL: @predsum_ws(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.predsum.ws.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t predsum_ws(uint64_t rs1, uint64_t rs2) {
+  return __riscv_predsum_ws(rs1, rs2);
+}
+
+// RV64P-LABEL: @predsumu_bs(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.predsumu.bs.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t predsumu_bs(uint64_t rs1, uint64_t rs2) {
+  return __riscv_predsumu_bs(rs1, rs2);
+}
+
+// RV64P-LABEL: @predsumu_hs(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.predsumu.hs.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t predsumu_hs(uint64_t rs1, uint64_t rs2) {
+  return __riscv_predsumu_hs(rs1, rs2);
+}
+
+// RV64P-LABEL: @predsumu_ws(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.predsumu.ws.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t predsumu_ws(uint64_t rs1, uint64_t rs2) {
+  return __riscv_predsumu_ws(rs1, rs2);
+}

>From 3e198d9c0942f91c28421499fe81f6d962ef9bd8 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Thu, 4 Sep 2025 22:59:26 +0800
Subject: [PATCH 27/40] [RISCV] Packed Arithmetic Shift Right Register C
 intrinsics

---
 clang/include/clang/Basic/BuiltinsRISCV.td    |  5 ++++
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp    | 16 +++++++++++
 clang/lib/Headers/riscv_simd.h                | 25 +++++++++++++++++
 .../RISCV/rvp-intrinsics/riscv32-simd.c       | 18 +++++++++++++
 .../RISCV/rvp-intrinsics/riscv64-simd.c       | 27 +++++++++++++++++++
 5 files changed, 91 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index 0739162f2745b..9c4a6559eadfd 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -164,6 +164,8 @@ def predsum_bs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def predsum_hs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def predsumu_bs_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def predsumu_hs_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psra_bs_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psra_hs_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 } // Features = "experimental-p,32bit"
 
 let Features = "experimental-p,64bit" in {
@@ -199,6 +201,9 @@ def predsum_ws     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def predsumu_bs_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def predsumu_hs_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def predsumu_ws    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psra_bs_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psra_hs_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psra_ws        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index 386f6321959d0..213d8d2bcb6ea 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -234,6 +234,11 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_predsumu_hs_32:
   case RISCV::BI__builtin_riscv_predsumu_hs_64:
   case RISCV::BI__builtin_riscv_predsumu_ws:
+  case RISCV::BI__builtin_riscv_psra_bs_32:
+  case RISCV::BI__builtin_riscv_psra_bs_64:
+  case RISCV::BI__builtin_riscv_psra_hs_32:
+  case RISCV::BI__builtin_riscv_psra_hs_64:
+  case RISCV::BI__builtin_riscv_psra_ws:
   case RISCV::BI__builtin_riscv_sadd: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -335,6 +340,17 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_predsumu_ws:
       ID = Intrinsic::riscv_predsumu_ws;
       break;
+    case RISCV::BI__builtin_riscv_psra_bs_32:
+    case RISCV::BI__builtin_riscv_psra_bs_64:
+      ID = Intrinsic::riscv_psra_bs;
+      break;
+    case RISCV::BI__builtin_riscv_psra_hs_32:
+    case RISCV::BI__builtin_riscv_psra_hs_64:
+      ID = Intrinsic::riscv_psra_hs;
+      break;
+    case RISCV::BI__builtin_riscv_psra_ws:
+      ID = Intrinsic::riscv_psra_ws;
+      break;
     case RISCV::BI__builtin_riscv_sadd:
       ID = Intrinsic::riscv_sadd;
       break;
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index 4e5445705227f..6b6bba51d5774 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -130,6 +130,16 @@ __riscv_predsumu_hs(uint32_t __x, uint32_t __y) {
   return __builtin_riscv_predsumu_hs_32(__x, __y);
 }
 
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psra_bs(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_psra_bs_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psra_hs(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_psra_hs_32(__x, __y);
+}
+
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
 __riscv_sadd(int32_t __x, int32_t __y) {
   return __builtin_riscv_sadd(__x, __y);
@@ -297,6 +307,21 @@ static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __riscv_predsumu_ws(uint64_t __x, uint64_t __y) {
   return __builtin_riscv_predsumu_ws(__x, __y);
 }
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psra_bs(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psra_bs_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psra_hs(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psra_hs_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psra_ws(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psra_ws(__x, __y);
+}
 #endif
 
 #endif // defined(__riscv_p)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
index 8048164c99743..f8bed12aa8275 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
@@ -203,6 +203,24 @@ uint32_t predsumu_hs(uint32_t rs1, uint32_t rs2) {
   return __riscv_predsumu_hs(rs1, rs2);
 }
 
+// RV32P-LABEL: @psra_bs(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.psra.bs.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t psra_bs(uint32_t rs1, uint32_t rs2) {
+  return __riscv_psra_bs(rs1, rs2);
+}
+
+// RV32P-LABEL: @psra_hs(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.psra.hs.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t psra_hs(uint32_t rs1, uint32_t rs2) {
+  return __riscv_psra_hs(rs1, rs2);
+}
+
 // RV32P-LABEL: @sadd(
 // RV32P-NEXT:  entry:
 // RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.sadd.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
index 667b0da8165e3..eb6cc455e4c6e 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -309,3 +309,30 @@ uint64_t predsumu_hs(uint64_t rs1, uint64_t rs2) {
 uint64_t predsumu_ws(uint64_t rs1, uint64_t rs2) {
   return __riscv_predsumu_ws(rs1, rs2);
 }
+
+// RV64P-LABEL: @psra_bs(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psra.bs.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psra_bs(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psra_bs(rs1, rs2);
+}
+
+// RV64P-LABEL: @psra_hs(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psra.hs.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psra_hs(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psra_hs(rs1, rs2);
+}
+
+// RV64P-LABEL: @psra_ws(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psra.ws.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psra_ws(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psra_ws(rs1, rs2);
+}

>From eab53ea0fa4958348ef153b15ef89ee66e1cd756 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Thu, 4 Sep 2025 23:06:15 +0800
Subject: [PATCH 28/40] [RISCV] Packed Addition and Saturating Addition C
 intrinsics

---
 clang/include/clang/Basic/BuiltinsRISCV.td    |  30 +++-
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp    |  92 ++++++++++++
 clang/lib/Headers/riscv_simd.h                | 140 ++++++++++++++++++
 .../RISCV/rvp-intrinsics/riscv32-simd.c       | 117 +++++++++++++++
 .../RISCV/rvp-intrinsics/riscv64-simd.c       | 135 +++++++++++++++++
 5 files changed, 513 insertions(+), 1 deletion(-)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index 9c4a6559eadfd..579807bd121ba 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -149,7 +149,6 @@ def psll_bs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def psll_hs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def padd_bs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def padd_hs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def sadd        : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
 def pusati_h_32 : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
 def usati_32    : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
 def psrai_b_32  : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
@@ -166,6 +165,20 @@ def predsumu_bs_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def predsumu_hs_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def psra_bs_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def psra_hs_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def padd_b_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def padd_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def sadd           : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def psadd_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psadd_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def aadd           : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def paadd_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def paadd_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def saddu          : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psaddu_b_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psaddu_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def aaddu          : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def paaddu_b_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def paaddu_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 } // Features = "experimental-p,32bit"
 
 let Features = "experimental-p,64bit" in {
@@ -204,6 +217,21 @@ def predsumu_ws    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def psra_bs_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def psra_hs_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def psra_ws        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def padd_b_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def padd_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def padd_w         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psadd_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psadd_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psadd_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def paadd_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def paadd_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def paadd_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psaddu_b_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psaddu_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psaddu_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def paaddu_b_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def paaddu_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def paaddu_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index 213d8d2bcb6ea..f66455f854b45 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -239,6 +239,34 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_psra_hs_32:
   case RISCV::BI__builtin_riscv_psra_hs_64:
   case RISCV::BI__builtin_riscv_psra_ws:
+  case RISCV::BI__builtin_riscv_padd_b_32:
+  case RISCV::BI__builtin_riscv_padd_b_64:
+  case RISCV::BI__builtin_riscv_padd_h_32:
+  case RISCV::BI__builtin_riscv_padd_h_64:
+  case RISCV::BI__builtin_riscv_padd_w:
+  case RISCV::BI__builtin_riscv_psadd_b_32:
+  case RISCV::BI__builtin_riscv_psadd_b_64:
+  case RISCV::BI__builtin_riscv_psadd_h_32:
+  case RISCV::BI__builtin_riscv_psadd_h_64:
+  case RISCV::BI__builtin_riscv_psadd_w:
+  case RISCV::BI__builtin_riscv_aadd:
+  case RISCV::BI__builtin_riscv_paadd_b_32:
+  case RISCV::BI__builtin_riscv_paadd_b_64:
+  case RISCV::BI__builtin_riscv_paadd_h_32:
+  case RISCV::BI__builtin_riscv_paadd_h_64:
+  case RISCV::BI__builtin_riscv_paadd_w:
+  case RISCV::BI__builtin_riscv_saddu:
+  case RISCV::BI__builtin_riscv_psaddu_b_32:
+  case RISCV::BI__builtin_riscv_psaddu_b_64:
+  case RISCV::BI__builtin_riscv_psaddu_h_32:
+  case RISCV::BI__builtin_riscv_psaddu_h_64:
+  case RISCV::BI__builtin_riscv_psaddu_w:
+  case RISCV::BI__builtin_riscv_aaddu:
+  case RISCV::BI__builtin_riscv_paaddu_b_32:
+  case RISCV::BI__builtin_riscv_paaddu_b_64:
+  case RISCV::BI__builtin_riscv_paaddu_h_32:
+  case RISCV::BI__builtin_riscv_paaddu_h_64:
+  case RISCV::BI__builtin_riscv_paaddu_w:
   case RISCV::BI__builtin_riscv_sadd: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -351,6 +379,70 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_psra_ws:
       ID = Intrinsic::riscv_psra_ws;
       break;
+    case RISCV::BI__builtin_riscv_padd_b_32:
+    case RISCV::BI__builtin_riscv_padd_b_64:
+      ID = Intrinsic::riscv_padd_b;
+      break;
+    case RISCV::BI__builtin_riscv_padd_h_32:
+    case RISCV::BI__builtin_riscv_padd_h_64:
+      ID = Intrinsic::riscv_padd_h;
+      break;
+    case RISCV::BI__builtin_riscv_padd_w:
+      ID = Intrinsic::riscv_padd_w;
+      break;
+    case RISCV::BI__builtin_riscv_psadd_b_32:
+    case RISCV::BI__builtin_riscv_psadd_b_64:
+      ID = Intrinsic::riscv_psadd_b;
+      break;
+    case RISCV::BI__builtin_riscv_psadd_h_32:
+    case RISCV::BI__builtin_riscv_psadd_h_64:
+      ID = Intrinsic::riscv_psadd_h;
+      break;
+    case RISCV::BI__builtin_riscv_psadd_w:
+      ID = Intrinsic::riscv_psadd_w;
+      break;
+    case RISCV::BI__builtin_riscv_aadd:
+      ID = Intrinsic::riscv_aadd;
+      break;
+    case RISCV::BI__builtin_riscv_paadd_b_32:
+    case RISCV::BI__builtin_riscv_paadd_b_64:
+      ID = Intrinsic::riscv_paadd_b;
+      break;
+    case RISCV::BI__builtin_riscv_paadd_h_32:
+    case RISCV::BI__builtin_riscv_paadd_h_64:
+      ID = Intrinsic::riscv_paadd_h;
+      break;
+    case RISCV::BI__builtin_riscv_paadd_w:
+      ID = Intrinsic::riscv_paadd_w;
+      break;
+    case RISCV::BI__builtin_riscv_saddu:
+      ID = Intrinsic::riscv_saddu;
+      break;
+    case RISCV::BI__builtin_riscv_psaddu_b_32:
+    case RISCV::BI__builtin_riscv_psaddu_b_64:
+      ID = Intrinsic::riscv_psaddu_b;
+      break;
+    case RISCV::BI__builtin_riscv_psaddu_h_32:
+    case RISCV::BI__builtin_riscv_psaddu_h_64:
+      ID = Intrinsic::riscv_psaddu_h;
+      break;
+    case RISCV::BI__builtin_riscv_psaddu_w:
+      ID = Intrinsic::riscv_psaddu_w;
+      break;
+    case RISCV::BI__builtin_riscv_aaddu:
+      ID = Intrinsic::riscv_aaddu;
+      break;
+    case RISCV::BI__builtin_riscv_paaddu_b_32:
+    case RISCV::BI__builtin_riscv_paaddu_b_64:
+      ID = Intrinsic::riscv_paaddu_b;
+      break;
+    case RISCV::BI__builtin_riscv_paaddu_h_32:
+    case RISCV::BI__builtin_riscv_paaddu_h_64:
+      ID = Intrinsic::riscv_paaddu_h;
+      break;
+    case RISCV::BI__builtin_riscv_paaddu_w:
+      ID = Intrinsic::riscv_paaddu_w;
+      break;
     case RISCV::BI__builtin_riscv_sadd:
       ID = Intrinsic::riscv_sadd;
       break;
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index 6b6bba51d5774..16dca60f67d07 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -140,10 +140,75 @@ __riscv_psra_hs(uint32_t __x, uint32_t __y) {
   return __builtin_riscv_psra_hs_32(__x, __y);
 }
 
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_padd_b(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_padd_b_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_padd_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_padd_h_32(__x, __y);
+}
+
 static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
 __riscv_sadd(int32_t __x, int32_t __y) {
   return __builtin_riscv_sadd(__x, __y);
 }
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psadd_b(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_psadd_b_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psadd_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_psadd_h_32(__x, __y);
+}
+
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_aadd(int32_t __x, int32_t __y) {
+  return __builtin_riscv_aadd(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_paadd_b(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_paadd_b_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_paadd_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_paadd_h_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_saddu(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_saddu(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psaddu_b(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_psaddu_b_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psaddu_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_psaddu_h_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_aaddu(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_aaddu(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_paaddu_b(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_paaddu_b_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_paaddu_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_paaddu_h_32(__x, __y);
+}
 #endif
 
 
@@ -322,6 +387,81 @@ static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __riscv_psra_ws(uint64_t __x, uint64_t __y) {
   return __builtin_riscv_psra_ws(__x, __y);
 }
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_padd_b(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_padd_b_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_padd_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_padd_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_padd_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_padd_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psadd_b(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psadd_b_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psadd_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psadd_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psadd_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psadd_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_paadd_b(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_paadd_b_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_paadd_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_paadd_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_paadd_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_paadd_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psaddu_b(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psaddu_b_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psaddu_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psaddu_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psaddu_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psaddu_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_paaddu_b(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_paaddu_b_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_paaddu_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_paaddu_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_paaddu_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_paaddu_w(__x, __y);
+}
 #endif
 
 #endif // defined(__riscv_p)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
index f8bed12aa8275..44780544f99a2 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
@@ -221,6 +221,24 @@ uint32_t psra_hs(uint32_t rs1, uint32_t rs2) {
   return __riscv_psra_hs(rs1, rs2);
 }
 
+// RV32P-LABEL: @padd_b(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.padd.b.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t padd_b(uint32_t rs1, uint32_t rs2) {
+  return __riscv_padd_b(rs1, rs2);
+}
+
+// RV32P-LABEL: @padd_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.padd.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t padd_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_padd_h(rs1, rs2);
+}
+
 // RV32P-LABEL: @sadd(
 // RV32P-NEXT:  entry:
 // RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.sadd.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
@@ -229,3 +247,102 @@ uint32_t psra_hs(uint32_t rs1, uint32_t rs2) {
 int32_t sadd(int32_t rs1, int32_t rs2) {
   return __riscv_sadd(rs1, rs2);
 }
+
+// RV32P-LABEL: @psadd_b(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.psadd.b.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t psadd_b(uint32_t rs1, uint32_t rs2) {
+  return __riscv_psadd_b(rs1, rs2);
+}
+
+// RV32P-LABEL: @psadd_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.psadd.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t psadd_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_psadd_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @aadd(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.aadd.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+int32_t aadd(int32_t rs1, int32_t rs2) {
+  return __riscv_aadd(rs1, rs2);
+}
+
+// RV32P-LABEL: @paadd_b(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.paadd.b.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t paadd_b(uint32_t rs1, uint32_t rs2) {
+  return __riscv_paadd_b(rs1, rs2);
+}
+
+// RV32P-LABEL: @paadd_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.paadd.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t paadd_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_paadd_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @saddu(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.saddu.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t saddu(uint32_t rs1, uint32_t rs2) {
+  return __riscv_saddu(rs1, rs2);
+}
+
+// RV32P-LABEL: @psaddu_b(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.psaddu.b.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t psaddu_b(uint32_t rs1, uint32_t rs2) {
+  return __riscv_psaddu_b(rs1, rs2);
+}
+
+// RV32P-LABEL: @psaddu_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.psaddu.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t psaddu_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_psaddu_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @aaddu(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.aaddu.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t aaddu(uint32_t rs1, uint32_t rs2) {
+  return __riscv_aaddu(rs1, rs2);
+}
+
+// RV32P-LABEL: @paaddu_b(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.paaddu.b.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t paaddu_b(uint32_t rs1, uint32_t rs2) {
+  return __riscv_paaddu_b(rs1, rs2);
+}
+
+// RV32P-LABEL: @paaddu_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.paaddu.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t paaddu_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_paaddu_h(rs1, rs2);
+}
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
index eb6cc455e4c6e..3e9df29b97b8e 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -336,3 +336,138 @@ uint64_t psra_hs(uint64_t rs1, uint64_t rs2) {
 uint64_t psra_ws(uint64_t rs1, uint64_t rs2) {
   return __riscv_psra_ws(rs1, rs2);
 }
+
+// RV64P-LABEL: @padd_b(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.padd.b.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t padd_b(uint64_t rs1, uint64_t rs2) {
+  return __riscv_padd_b(rs1, rs2);
+}
+
+// RV64P-LABEL: @padd_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.padd.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t padd_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_padd_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @padd_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.padd.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t padd_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_padd_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @psadd_b(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psadd.b.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psadd_b(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psadd_b(rs1, rs2);
+}
+
+// RV64P-LABEL: @psadd_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psadd.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psadd_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psadd_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @psadd_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psadd.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psadd_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psadd_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @paadd_b(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.paadd.b.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t paadd_b(uint64_t rs1, uint64_t rs2) {
+  return __riscv_paadd_b(rs1, rs2);
+}
+
+// RV64P-LABEL: @paadd_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.paadd.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t paadd_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_paadd_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @paadd_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.paadd.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t paadd_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_paadd_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @psaddu_b(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psaddu.b.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psaddu_b(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psaddu_b(rs1, rs2);
+}
+
+// RV64P-LABEL: @psaddu_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psaddu.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psaddu_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psaddu_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @psaddu_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psaddu.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psaddu_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psaddu_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @paaddu_b(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.paaddu.b.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t paaddu_b(uint64_t rs1, uint64_t rs2) {
+  return __riscv_paaddu_b(rs1, rs2);
+}
+
+// RV64P-LABEL: @paaddu_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.paaddu.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t paaddu_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_paaddu_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @paaddu_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.paaddu.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t paaddu_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_paaddu_w(rs1, rs2);
+}

>From 89aaa6643323236bee7afae3bc3a93736d7983e3 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Thu, 4 Sep 2025 23:08:59 +0800
Subject: [PATCH 29/40] [RISCV] Packed Subtraction and Saturating Subtraction C
 intrinsics

---
 clang/include/clang/Basic/BuiltinsRISCV.td    |  29 ++++
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp    |  96 ++++++++++++
 clang/lib/Headers/riscv_simd.h                | 145 ++++++++++++++++++
 .../RISCV/rvp-intrinsics/riscv32-simd.c       | 126 +++++++++++++++
 .../RISCV/rvp-intrinsics/riscv64-simd.c       | 135 ++++++++++++++++
 5 files changed, 531 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index 579807bd121ba..b466078f13fdb 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -179,6 +179,20 @@ def psaddu_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def aaddu          : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def paaddu_b_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def paaddu_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psub_b_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psub_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def ssub           : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def pssub_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pssub_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def asub           : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def pasub_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pasub_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def ssubu          : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pssubu_b_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pssubu_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def asubu          : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pasubu_b_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pasubu_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 } // Features = "experimental-p,32bit"
 
 let Features = "experimental-p,64bit" in {
@@ -232,6 +246,21 @@ def psaddu_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def paaddu_b_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def paaddu_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def paaddu_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psub_b_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psub_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psub_w         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pssub_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pssub_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pssub_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pasub_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pasub_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pasub_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pssubu_b_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pssubu_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pssubu_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pasubu_b_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pasubu_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pasubu_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index f66455f854b45..b4fadbbe78e5c 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -267,6 +267,35 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_paaddu_h_32:
   case RISCV::BI__builtin_riscv_paaddu_h_64:
   case RISCV::BI__builtin_riscv_paaddu_w:
+  case RISCV::BI__builtin_riscv_psub_b_32:
+  case RISCV::BI__builtin_riscv_psub_b_64:
+  case RISCV::BI__builtin_riscv_psub_h_32:
+  case RISCV::BI__builtin_riscv_psub_h_64:
+  case RISCV::BI__builtin_riscv_psub_w:
+  case RISCV::BI__builtin_riscv_ssub:
+  case RISCV::BI__builtin_riscv_pssub_b_32:
+  case RISCV::BI__builtin_riscv_pssub_b_64:
+  case RISCV::BI__builtin_riscv_pssub_h_32:
+  case RISCV::BI__builtin_riscv_pssub_h_64:
+  case RISCV::BI__builtin_riscv_pssub_w:
+  case RISCV::BI__builtin_riscv_asub:
+  case RISCV::BI__builtin_riscv_pasub_b_32:
+  case RISCV::BI__builtin_riscv_pasub_b_64:
+  case RISCV::BI__builtin_riscv_pasub_h_32:
+  case RISCV::BI__builtin_riscv_pasub_h_64:
+  case RISCV::BI__builtin_riscv_pasub_w:
+  case RISCV::BI__builtin_riscv_ssubu:
+  case RISCV::BI__builtin_riscv_pssubu_b_32:
+  case RISCV::BI__builtin_riscv_pssubu_b_64:
+  case RISCV::BI__builtin_riscv_pssubu_h_32:
+  case RISCV::BI__builtin_riscv_pssubu_h_64:
+  case RISCV::BI__builtin_riscv_pssubu_w:
+  case RISCV::BI__builtin_riscv_asubu:
+  case RISCV::BI__builtin_riscv_pasubu_b_32:
+  case RISCV::BI__builtin_riscv_pasubu_b_64:
+  case RISCV::BI__builtin_riscv_pasubu_h_32:
+  case RISCV::BI__builtin_riscv_pasubu_h_64:
+  case RISCV::BI__builtin_riscv_pasubu_w:
   case RISCV::BI__builtin_riscv_sadd: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -443,6 +472,73 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_paaddu_w:
       ID = Intrinsic::riscv_paaddu_w;
       break;
+    case RISCV::BI__builtin_riscv_psub_b_32:
+    case RISCV::BI__builtin_riscv_psub_b_64:
+      ID = Intrinsic::riscv_psub_b;
+      break;
+    case RISCV::BI__builtin_riscv_psub_h_32:
+    case RISCV::BI__builtin_riscv_psub_h_64:
+      ID = Intrinsic::riscv_psub_h;
+      break;
+    case RISCV::BI__builtin_riscv_psub_w:
+      ID = Intrinsic::riscv_psub_w;
+      break;
+    case RISCV::BI__builtin_riscv_ssub:
+      ID = Intrinsic::riscv_ssub;
+      break;
+    case RISCV::BI__builtin_riscv_pssub_b_32:
+    case RISCV::BI__builtin_riscv_pssub_b_64:
+      ID = Intrinsic::riscv_pssub_b;
+      break;
+    case RISCV::BI__builtin_riscv_pssub_h_32:
+    case RISCV::BI__builtin_riscv_pssub_h_64:
+      ID = Intrinsic::riscv_pssub_h;
+      break;
+    case RISCV::BI__builtin_riscv_pssub_w:
+      ID = Intrinsic::riscv_pssub_w;
+      break;
+    case RISCV::BI__builtin_riscv_asub:
+      ID = Intrinsic::riscv_asub;
+      break;
+    case RISCV::BI__builtin_riscv_pasub_b_32:
+    case RISCV::BI__builtin_riscv_pasub_b_64:
+      ID = Intrinsic::riscv_pasub_b;
+      break;
+    case RISCV::BI__builtin_riscv_pasub_h_32:
+    case RISCV::BI__builtin_riscv_pasub_h_64:
+      ID = Intrinsic::riscv_pasub_h;
+      break;
+    case RISCV::BI__builtin_riscv_pasub_w:
+      ID = Intrinsic::riscv_pasub_w;
+      break;
+    case RISCV::BI__builtin_riscv_ssubu:
+      ID = Intrinsic::riscv_ssubu;
+      break;
+    case RISCV::BI__builtin_riscv_pssubu_b_32:
+    case RISCV::BI__builtin_riscv_pssubu_b_64:
+      ID = Intrinsic::riscv_pssubu_b;
+      break;
+    case RISCV::BI__builtin_riscv_pssubu_h_32:
+    case RISCV::BI__builtin_riscv_pssubu_h_64:
+      ID = Intrinsic::riscv_pssubu_h;
+      break;
+    case RISCV::BI__builtin_riscv_pssubu_w:
+      ID = Intrinsic::riscv_pssubu_w;
+      break;
+    case RISCV::BI__builtin_riscv_asubu:
+      ID = Intrinsic::riscv_asubu;
+      break;
+    case RISCV::BI__builtin_riscv_pasubu_b_32:
+    case RISCV::BI__builtin_riscv_pasubu_b_64:
+      ID = Intrinsic::riscv_pasubu_b;
+      break;
+    case RISCV::BI__builtin_riscv_pasubu_h_32:
+    case RISCV::BI__builtin_riscv_pasubu_h_64:
+      ID = Intrinsic::riscv_pasubu_h;
+      break;
+    case RISCV::BI__builtin_riscv_pasubu_w:
+      ID = Intrinsic::riscv_pasubu_w;
+      break;
     case RISCV::BI__builtin_riscv_sadd:
       ID = Intrinsic::riscv_sadd;
       break;
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index 16dca60f67d07..0bfaeb03b93ce 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -209,6 +209,76 @@ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
 __riscv_paaddu_h(uint32_t __x, uint32_t __y) {
   return __builtin_riscv_paaddu_h_32(__x, __y);
 }
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psub_b(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_psub_b_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psub_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_psub_h_32(__x, __y);
+}
+
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_ssub(int32_t __x, int32_t __y) {
+  return __builtin_riscv_ssub(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pssub_b(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pssub_b_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pssub_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pssub_h_32(__x, __y);
+}
+
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_asub(int32_t __x, int32_t __y) {
+  return __builtin_riscv_asub(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pasub_b(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pasub_b_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pasub_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pasub_h_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_ssubu(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_ssubu(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pssubu_b(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pssubu_b_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pssubu_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pssubu_h_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_asubu(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_asubu(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pasubu_b(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pasubu_b_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pasubu_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pasubu_h_32(__x, __y);
+}
 #endif
 
 
@@ -462,6 +532,81 @@ static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __riscv_paaddu_w(uint64_t __x, uint64_t __y) {
   return __builtin_riscv_paaddu_w(__x, __y);
 }
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psub_b(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psub_b_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psub_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psub_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psub_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psub_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pssub_b(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pssub_b_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pssub_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pssub_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pssub_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pssub_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pasub_b(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pasub_b_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pasub_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pasub_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pasub_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pasub_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pssubu_b(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pssubu_b_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pssubu_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pssubu_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pssubu_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pssubu_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pasubu_b(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pasubu_b_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pasubu_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pasubu_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pasubu_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pasubu_w(__x, __y);
+}
 #endif
 
 #endif // defined(__riscv_p)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
index 44780544f99a2..473bac94aee15 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
@@ -346,3 +346,129 @@ uint32_t paaddu_b(uint32_t rs1, uint32_t rs2) {
 uint32_t paaddu_h(uint32_t rs1, uint32_t rs2) {
   return __riscv_paaddu_h(rs1, rs2);
 }
+
+// RV32P-LABEL: @psub_b(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.psub.b.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t psub_b(uint32_t rs1, uint32_t rs2) {
+  return __riscv_psub_b(rs1, rs2);
+}
+
+// RV32P-LABEL: @psub_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.psub.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t psub_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_psub_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @ssub(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.ssub.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+int32_t ssub(int32_t rs1, int32_t rs2) {
+  return __riscv_ssub(rs1, rs2);
+}
+
+// RV32P-LABEL: @pssub_b(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pssub.b.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pssub_b(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pssub_b(rs1, rs2);
+}
+
+// RV32P-LABEL: @pssub_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pssub.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pssub_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pssub_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @asub(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.asub.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+int32_t asub(int32_t rs1, int32_t rs2) {
+  return __riscv_asub(rs1, rs2);
+}
+
+// RV32P-LABEL: @pasub_b(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pasub.b.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pasub_b(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pasub_b(rs1, rs2);
+}
+
+// RV32P-LABEL: @pasub_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pasub.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pasub_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pasub_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @ssubu(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.ssubu.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t ssubu(uint32_t rs1, uint32_t rs2) {
+  return __riscv_ssubu(rs1, rs2);
+}
+
+// RV32P-LABEL: @pssubu_b(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pssubu.b.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pssubu_b(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pssubu_b(rs1, rs2);
+}
+
+// RV32P-LABEL: @pssubu_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pssubu.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pssubu_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pssubu_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @asubu(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.asubu.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t asubu(uint32_t rs1, uint32_t rs2) {
+  return __riscv_asubu(rs1, rs2);
+}
+
+// RV32P-LABEL: @pasubu_b(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pasubu.b.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pasubu_b(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pasubu_b(rs1, rs2);
+}
+
+// RV32P-LABEL: @pasubu_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pasubu.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pasubu_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pasubu_h(rs1, rs2);
+}
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
index 3e9df29b97b8e..609ab6afe16ca 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -471,3 +471,138 @@ uint64_t paaddu_h(uint64_t rs1, uint64_t rs2) {
 uint64_t paaddu_w(uint64_t rs1, uint64_t rs2) {
   return __riscv_paaddu_w(rs1, rs2);
 }
+
+// RV64P-LABEL: @psub_b(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psub.b.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psub_b(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psub_b(rs1, rs2);
+}
+
+// RV64P-LABEL: @psub_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psub.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psub_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psub_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @psub_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psub.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psub_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psub_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @pssub_b(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pssub.b.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pssub_b(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pssub_b(rs1, rs2);
+}
+
+// RV64P-LABEL: @pssub_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pssub.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pssub_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pssub_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @pssub_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pssub.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pssub_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pssub_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @pasub_b(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pasub.b.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pasub_b(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pasub_b(rs1, rs2);
+}
+
+// RV64P-LABEL: @pasub_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pasub.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pasub_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pasub_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @pasub_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pasub.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pasub_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pasub_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @pssubu_b(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pssubu.b.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pssubu_b(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pssubu_b(rs1, rs2);
+}
+
+// RV64P-LABEL: @pssubu_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pssubu.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pssubu_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pssubu_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @pssubu_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pssubu.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pssubu_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pssubu_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @pasubu_b(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pasubu.b.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pasubu_b(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pasubu_b(rs1, rs2);
+}
+
+// RV64P-LABEL: @pasubu_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pasubu.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pasubu_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pasubu_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @pasubu_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pasubu.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pasubu_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pasubu_w(rs1, rs2);
+}

>From 0dbc19e463f4f24f9e05c02fda427deb4f2ee967 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Thu, 4 Sep 2025 23:17:18 +0800
Subject: [PATCH 30/40] [RISCV] Packed Difference C intrinsics

---
 clang/include/clang/Basic/BuiltinsRISCV.td    |  8 ++++
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp    | 24 +++++++++++
 clang/lib/Headers/riscv_simd.h                | 40 +++++++++++++++++++
 .../RISCV/rvp-intrinsics/riscv32-simd.c       | 36 +++++++++++++++++
 .../RISCV/rvp-intrinsics/riscv64-simd.c       | 36 +++++++++++++++++
 5 files changed, 144 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index b466078f13fdb..8529a7d5ce224 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -193,6 +193,10 @@ def pssubu_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def asubu          : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def pasubu_b_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def pasubu_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pdif_b_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pdif_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pdifu_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pdifu_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 } // Features = "experimental-p,32bit"
 
 let Features = "experimental-p,64bit" in {
@@ -261,6 +265,10 @@ def pssubu_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def pasubu_b_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def pasubu_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def pasubu_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pdif_b_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pdif_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pdifu_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pdifu_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index b4fadbbe78e5c..2096253b75d2f 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -296,6 +296,14 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_pasubu_h_32:
   case RISCV::BI__builtin_riscv_pasubu_h_64:
   case RISCV::BI__builtin_riscv_pasubu_w:
+  case RISCV::BI__builtin_riscv_pdif_b_32:
+  case RISCV::BI__builtin_riscv_pdif_b_64:
+  case RISCV::BI__builtin_riscv_pdif_h_32:
+  case RISCV::BI__builtin_riscv_pdif_h_64:
+  case RISCV::BI__builtin_riscv_pdifu_b_32:
+  case RISCV::BI__builtin_riscv_pdifu_b_64:
+  case RISCV::BI__builtin_riscv_pdifu_h_32:
+  case RISCV::BI__builtin_riscv_pdifu_h_64:
   case RISCV::BI__builtin_riscv_sadd: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -539,6 +547,22 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_pasubu_w:
       ID = Intrinsic::riscv_pasubu_w;
       break;
+    case RISCV::BI__builtin_riscv_pdif_b_32:
+    case RISCV::BI__builtin_riscv_pdif_b_64:
+      ID = Intrinsic::riscv_pdif_b;
+      break;
+    case RISCV::BI__builtin_riscv_pdif_h_32:
+    case RISCV::BI__builtin_riscv_pdif_h_64:
+      ID = Intrinsic::riscv_pdif_h;
+      break;
+    case RISCV::BI__builtin_riscv_pdifu_b_32:
+    case RISCV::BI__builtin_riscv_pdifu_b_64:
+      ID = Intrinsic::riscv_pdifu_b;
+      break;
+    case RISCV::BI__builtin_riscv_pdifu_h_32:
+    case RISCV::BI__builtin_riscv_pdifu_h_64:
+      ID = Intrinsic::riscv_pdifu_h;
+      break;
     case RISCV::BI__builtin_riscv_sadd:
       ID = Intrinsic::riscv_sadd;
       break;
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index 0bfaeb03b93ce..ab4b65e1928bb 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -279,6 +279,26 @@ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
 __riscv_pasubu_h(uint32_t __x, uint32_t __y) {
   return __builtin_riscv_pasubu_h_32(__x, __y);
 }
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pdif_b(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pdif_b_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pdif_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pdif_h_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pdifu_b(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pdifu_b_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pdifu_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pdifu_h_32(__x, __y);
+}
 #endif
 
 
@@ -607,6 +627,26 @@ static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __riscv_pasubu_w(uint64_t __x, uint64_t __y) {
   return __builtin_riscv_pasubu_w(__x, __y);
 }
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pdif_b(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pdif_b_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pdif_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pdif_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pdifu_b(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pdifu_b_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pdifu_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pdifu_h_64(__x, __y);
+}
 #endif
 
 #endif // defined(__riscv_p)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
index 473bac94aee15..4941b48652bf0 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
@@ -472,3 +472,39 @@ uint32_t pasubu_b(uint32_t rs1, uint32_t rs2) {
 uint32_t pasubu_h(uint32_t rs1, uint32_t rs2) {
   return __riscv_pasubu_h(rs1, rs2);
 }
+
+// RV32P-LABEL: @pdif_b(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pdif.b.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pdif_b(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pdif_b(rs1, rs2);
+}
+
+// RV32P-LABEL: @pdif_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pdif.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pdif_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pdif_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @pdifu_b(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pdifu.b.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pdifu_b(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pdifu_b(rs1, rs2);
+}
+
+// RV32P-LABEL: @pdifu_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pdifu.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pdifu_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pdifu_h(rs1, rs2);
+}
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
index 609ab6afe16ca..e98332e196731 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -606,3 +606,39 @@ uint64_t pasubu_h(uint64_t rs1, uint64_t rs2) {
 uint64_t pasubu_w(uint64_t rs1, uint64_t rs2) {
   return __riscv_pasubu_w(rs1, rs2);
 }
+
+// RV64P-LABEL: @pdif_b(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pdif.b.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pdif_b(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pdif_b(rs1, rs2);
+}
+
+// RV64P-LABEL: @pdif_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pdif.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pdif_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pdif_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @pdifu_b(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pdifu.b.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pdifu_b(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pdifu_b(rs1, rs2);
+}
+
+// RV64P-LABEL: @pdifu_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pdifu.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pdifu_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pdifu_h(rs1, rs2);
+}

>From afbe7b51c716b16c3f8aeaf2f2493826398a8068 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Fri, 5 Sep 2025 10:41:12 +0800
Subject: [PATCH 31/40] [RISCV] Packed Multiplication C intrinsics

---
 clang/include/clang/Basic/BuiltinsRISCV.td    | 10 ++++
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp    | 44 +++++++++++++++
 clang/lib/Headers/riscv_simd.h                | 50 +++++++++++++++++
 .../RISCV/rvp-intrinsics/riscv32-simd.c       | 36 +++++++++++++
 .../RISCV/rvp-intrinsics/riscv64-simd.c       | 54 +++++++++++++++++++
 5 files changed, 194 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index 8529a7d5ce224..62519a372ee52 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -197,6 +197,10 @@ def pdif_b_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def pdif_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def pdifu_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def pdifu_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmul_h_b01_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulu_h_b01_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mul_h01        : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulu_h01       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 } // Features = "experimental-p,32bit"
 
 let Features = "experimental-p,64bit" in {
@@ -269,6 +273,12 @@ def pdif_b_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def pdif_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def pdifu_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def pdifu_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmul_h_b01_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmul_w_h01     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulu_h_b01_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulu_w_h01    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def mul_w01        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def mulu_w01       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index 2096253b75d2f..3e1b93dd6b59c 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -304,6 +304,10 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_pdifu_b_64:
   case RISCV::BI__builtin_riscv_pdifu_h_32:
   case RISCV::BI__builtin_riscv_pdifu_h_64:
+  case RISCV::BI__builtin_riscv_mul_h01:
+  case RISCV::BI__builtin_riscv_mul_w01:
+  case RISCV::BI__builtin_riscv_mulu_h01:
+  case RISCV::BI__builtin_riscv_mulu_w01:
   case RISCV::BI__builtin_riscv_sadd: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -563,6 +567,18 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_pdifu_h_64:
       ID = Intrinsic::riscv_pdifu_h;
       break;
+    case RISCV::BI__builtin_riscv_mul_h01:
+      ID = Intrinsic::riscv_mul_h01;
+      break;
+    case RISCV::BI__builtin_riscv_mul_w01:
+      ID = Intrinsic::riscv_mul_w01;
+      break;
+    case RISCV::BI__builtin_riscv_mulu_h01:
+      ID = Intrinsic::riscv_mulu_h01;
+      break;
+    case RISCV::BI__builtin_riscv_mulu_w01:
+      ID = Intrinsic::riscv_mulu_w01;
+      break;
     case RISCV::BI__builtin_riscv_sadd:
       ID = Intrinsic::riscv_sadd;
       break;
@@ -673,6 +689,34 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     break;
   }
 
+
+  case RISCV::BI__builtin_riscv_pmul_h_b01_32:
+  case RISCV::BI__builtin_riscv_pmul_h_b01_64:
+  case RISCV::BI__builtin_riscv_pmul_w_h01:
+  case RISCV::BI__builtin_riscv_pmulu_h_b01_32:
+  case RISCV::BI__builtin_riscv_pmulu_h_b01_64:
+  case RISCV::BI__builtin_riscv_pmulu_w_h01: {
+    switch (BuiltinID) {
+    default: llvm_unreachable("unexpected builtin ID");
+    case RISCV::BI__builtin_riscv_pmul_h_b01_32:
+    case RISCV::BI__builtin_riscv_pmul_h_b01_64:
+      ID = Intrinsic::riscv_pmul_h_b01;
+      break;
+    case RISCV::BI__builtin_riscv_pmul_w_h01:
+      ID = Intrinsic::riscv_pmul_w_h01;
+      break;
+    case RISCV::BI__builtin_riscv_pmulu_h_b01_32:
+    case RISCV::BI__builtin_riscv_pmulu_h_b01_64:
+      ID = Intrinsic::riscv_pmulu_h_b01;
+      break;
+    case RISCV::BI__builtin_riscv_pmulu_w_h01:
+      ID = Intrinsic::riscv_pmulu_w_h01;
+      break;
+    }
+    IntrinsicTypes = {ResultType, Ops[0]->getType()};
+    break;
+  }
+
   // Zk builtins
 
   // Zknh
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index ab4b65e1928bb..262ebda2d02ea 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -299,6 +299,26 @@ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
 __riscv_pdifu_h(uint32_t __x, uint32_t __y) {
   return __builtin_riscv_pdifu_h_32(__x, __y);
 }
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmul_h_b01(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmul_h_b01_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulu_h_b01(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmulu_h_b01_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mul_h01(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_mul_h01(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mulu_h01(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_mulu_h01(__x, __y);
+}
 #endif
 
 
@@ -647,6 +667,36 @@ static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __riscv_pdifu_h(uint64_t __x, uint64_t __y) {
   return __builtin_riscv_pdifu_h_64(__x, __y);
 }
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmul_h_b01(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmul_h_b01_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmul_w_h01(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmul_w_h01(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulu_h_b01(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulu_h_b01_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulu_w_h01(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulu_w_h01(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mul_w01(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_mul_w01(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mulu_w01(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_mulu_w01(__x, __y);
+}
 #endif
 
 #endif // defined(__riscv_p)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
index 4941b48652bf0..c1d3ac877fdbf 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
@@ -508,3 +508,39 @@ uint32_t pdifu_b(uint32_t rs1, uint32_t rs2) {
 uint32_t pdifu_h(uint32_t rs1, uint32_t rs2) {
   return __riscv_pdifu_h(rs1, rs2);
 }
+
+// RV32P-LABEL: @pmul_h_b01(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmul.h.b01.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmul_h_b01(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmul_h_b01(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmulu_h_b01(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmulu.h.b01.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmulu_h_b01(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmulu_h_b01(rs1, rs2);
+}
+
+// RV32P-LABEL: @mul_h01(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.mul.h01.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t mul_h01(uint32_t rs1, uint32_t rs2) {
+  return __riscv_mul_h01(rs1, rs2);
+}
+
+// RV32P-LABEL: @mulu_h01(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.mulu.h01.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t mulu_h01(uint32_t rs1, uint32_t rs2) {
+  return __riscv_mulu_h01(rs1, rs2);
+}
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
index e98332e196731..e692baa72004e 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -642,3 +642,57 @@ uint64_t pdifu_b(uint64_t rs1, uint64_t rs2) {
 uint64_t pdifu_h(uint64_t rs1, uint64_t rs2) {
   return __riscv_pdifu_h(rs1, rs2);
 }
+
+// RV64P-LABEL: @pmul_h_b01(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmul.h.b01.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmul_h_b01(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmul_h_b01(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmul_w_h01(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmul.w.h01.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmul_w_h01(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmul_w_h01(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulu_h_b01(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulu.h.b01.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulu_h_b01(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulu_h_b01(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulu_w_h01(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulu.w.h01.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulu_w_h01(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulu_w_h01(rs1, rs2);
+}
+
+// RV64P-LABEL: @mul_w01(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.mul.w01.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t mul_w01(uint64_t rs1, uint64_t rs2) {
+  return __riscv_mul_w01(rs1, rs2);
+}
+
+// RV64P-LABEL: @mulu_w01(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.mulu.w01.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t mulu_w01(uint64_t rs1, uint64_t rs2) {
+  return __riscv_mulu_w01(rs1, rs2);
+}

>From a9e2728d22b8c6384919fdd3fca35d1e0812c6c5 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Fri, 5 Sep 2025 10:49:58 +0800
Subject: [PATCH 32/40] [RISCV] Packed Shift Left and Shift Right C intrinsics
 w/o srx

---
 clang/include/clang/Basic/BuiltinsRISCV.td             |  2 ++
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp             |  6 ++++++
 clang/lib/Headers/riscv_simd.h                         | 10 ++++++++++
 clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c |  9 +++++++++
 clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c |  9 +++++++++
 5 files changed, 36 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index 62519a372ee52..3f4e818ca732a 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -201,6 +201,7 @@ def pmul_h_b01_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def pmulu_h_b01_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def mul_h01        : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def mulu_h01       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def slx_32         : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 } // Features = "experimental-p,32bit"
 
 let Features = "experimental-p,64bit" in {
@@ -279,6 +280,7 @@ def pmulu_h_b01_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def pmulu_w_h01    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def mul_w01        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def mulu_w01       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def slx_64         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index 3e1b93dd6b59c..447ddf59573ab 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -308,6 +308,8 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_mul_w01:
   case RISCV::BI__builtin_riscv_mulu_h01:
   case RISCV::BI__builtin_riscv_mulu_w01:
+  case RISCV::BI__builtin_riscv_slx_32:
+  case RISCV::BI__builtin_riscv_slx_64:
   case RISCV::BI__builtin_riscv_sadd: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -579,6 +581,10 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_mulu_w01:
       ID = Intrinsic::riscv_mulu_w01;
       break;
+    case RISCV::BI__builtin_riscv_slx_32:
+    case RISCV::BI__builtin_riscv_slx_64:
+      ID = Intrinsic::riscv_slx;
+      break;
     case RISCV::BI__builtin_riscv_sadd:
       ID = Intrinsic::riscv_sadd;
       break;
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index 262ebda2d02ea..19299883c01b6 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -319,6 +319,11 @@ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
 __riscv_mulu_h01(uint32_t __x, uint32_t __y) {
   return __builtin_riscv_mulu_h01(__x, __y);
 }
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_slx(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_slx_32(__x, __y);
+}
 #endif
 
 
@@ -697,6 +702,11 @@ static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __riscv_mulu_w01(uint64_t __x, uint64_t __y) {
   return __builtin_riscv_mulu_w01(__x, __y);
 }
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_slx(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_slx_64(__x, __y);
+}
 #endif
 
 #endif // defined(__riscv_p)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
index c1d3ac877fdbf..fba30760bc395 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
@@ -544,3 +544,12 @@ uint32_t mul_h01(uint32_t rs1, uint32_t rs2) {
 uint32_t mulu_h01(uint32_t rs1, uint32_t rs2) {
   return __riscv_mulu_h01(rs1, rs2);
 }
+
+// RV32P-LABEL: @slx(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.slx.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t slx(uint32_t rs1, uint32_t rs2) {
+  return __riscv_slx(rs1, rs2);
+}
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
index e692baa72004e..b211272556632 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -696,3 +696,12 @@ uint64_t mul_w01(uint64_t rs1, uint64_t rs2) {
 uint64_t mulu_w01(uint64_t rs1, uint64_t rs2) {
   return __riscv_mulu_w01(rs1, rs2);
 }
+
+// RV64P-LABEL: @slx(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.slx.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t slx(uint64_t rs1, uint64_t rs2) {
+  return __riscv_slx(rs1, rs2);
+}

>From b83ae90fd90974e16dde39e82cd1deb67acd4fc0 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Fri, 5 Sep 2025 11:05:58 +0800
Subject: [PATCH 33/40] [RISCV] Packed Shift-and-Add (SH1ADD) C intrinsics

---
 clang/include/clang/Basic/BuiltinsRISCV.td    |  7 ++++
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp    | 24 +++++++++++++
 clang/lib/Headers/riscv_simd.h                | 35 ++++++++++++++++++
 .../RISCV/rvp-intrinsics/riscv32-simd.c       | 27 ++++++++++++++
 .../RISCV/rvp-intrinsics/riscv64-simd.c       | 36 +++++++++++++++++++
 5 files changed, 129 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index 3f4e818ca732a..c964a77f4ee28 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -202,6 +202,9 @@ def pmulu_h_b01_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def mul_h01        : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def mulu_h01       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def slx_32         : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psh1add_h_32   : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def ssh1sadd       : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def pssh1sadd_h_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 } // Features = "experimental-p,32bit"
 
 let Features = "experimental-p,64bit" in {
@@ -281,6 +284,10 @@ def pmulu_w_h01    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def mul_w01        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def mulu_w01       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def slx_64         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psh1add_h_64   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psh1add_w      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pssh1sadd_h_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pssh1sadd_w    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index 447ddf59573ab..79f60327b01ab 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -310,6 +310,13 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_mulu_w01:
   case RISCV::BI__builtin_riscv_slx_32:
   case RISCV::BI__builtin_riscv_slx_64:
+  case RISCV::BI__builtin_riscv_psh1add_h_32:
+  case RISCV::BI__builtin_riscv_psh1add_h_64:
+  case RISCV::BI__builtin_riscv_psh1add_w:
+  case RISCV::BI__builtin_riscv_ssh1sadd:
+  case RISCV::BI__builtin_riscv_pssh1sadd_h_32:
+  case RISCV::BI__builtin_riscv_pssh1sadd_h_64:
+  case RISCV::BI__builtin_riscv_pssh1sadd_w:
   case RISCV::BI__builtin_riscv_sadd: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -585,6 +592,23 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_slx_64:
       ID = Intrinsic::riscv_slx;
       break;
+    case RISCV::BI__builtin_riscv_psh1add_h_32:
+    case RISCV::BI__builtin_riscv_psh1add_h_64:
+      ID = Intrinsic::riscv_psh1add_h;
+      break;
+    case RISCV::BI__builtin_riscv_psh1add_w:
+      ID = Intrinsic::riscv_psh1add_w;
+      break;
+    case RISCV::BI__builtin_riscv_ssh1sadd:
+      ID = Intrinsic::riscv_ssh1sadd;
+      break;
+    case RISCV::BI__builtin_riscv_pssh1sadd_h_32:
+    case RISCV::BI__builtin_riscv_pssh1sadd_h_64:
+      ID = Intrinsic::riscv_pssh1sadd_h;
+      break;
+    case RISCV::BI__builtin_riscv_pssh1sadd_w:
+      ID = Intrinsic::riscv_pssh1sadd_w;
+      break;
     case RISCV::BI__builtin_riscv_sadd:
       ID = Intrinsic::riscv_sadd;
       break;
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index 19299883c01b6..c393a888637b0 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -324,6 +324,21 @@ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
 __riscv_slx(uint32_t __x, uint32_t __y) {
   return __builtin_riscv_slx_32(__x, __y);
 }
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psh1add_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_psh1add_h_32(__x, __y);
+}
+
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_ssh1sadd(int32_t __x, int32_t __y) {
+  return __builtin_riscv_ssh1sadd(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pssh1sadd_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pssh1sadd_h_32(__x, __y);
+}
 #endif
 
 
@@ -707,6 +722,26 @@ static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __riscv_slx(uint64_t __x, uint64_t __y) {
   return __builtin_riscv_slx_64(__x, __y);
 }
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psh1add_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psh1add_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psh1add_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psh1add_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pssh1sadd_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pssh1sadd_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pssh1sadd_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pssh1sadd_w(__x, __y);
+}
 #endif
 
 #endif // defined(__riscv_p)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
index fba30760bc395..a5632a77de204 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
@@ -553,3 +553,30 @@ uint32_t mulu_h01(uint32_t rs1, uint32_t rs2) {
 uint32_t slx(uint32_t rs1, uint32_t rs2) {
   return __riscv_slx(rs1, rs2);
 }
+
+// RV32P-LABEL: @psh1add_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.psh1add.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t psh1add_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_psh1add_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @ssh1sadd(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.ssh1sadd.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+int32_t ssh1sadd(int32_t rs1, int32_t rs2) {
+  return __riscv_ssh1sadd(rs1, rs2);
+}
+
+// RV32P-LABEL: @pssh1sadd_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pssh1sadd.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pssh1sadd_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pssh1sadd_h(rs1, rs2);
+}
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
index b211272556632..29fdbdb299773 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -705,3 +705,39 @@ uint64_t mulu_w01(uint64_t rs1, uint64_t rs2) {
 uint64_t slx(uint64_t rs1, uint64_t rs2) {
   return __riscv_slx(rs1, rs2);
 }
+
+// RV64P-LABEL: @psh1add_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psh1add.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psh1add_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psh1add_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @psh1add_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psh1add.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psh1add_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psh1add_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @pssh1sadd_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pssh1sadd.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pssh1sadd_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pssh1sadd_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @pssh1sadd_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pssh1sadd.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pssh1sadd_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pssh1sadd_w(rs1, rs2);
+}

>From 2e8915121aa288d94d7496420bf719cfa7d5b697 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Fri, 5 Sep 2025 11:10:37 +0800
Subject: [PATCH 34/40] [RISCV] Packed Zip and Unzip C intrinsics

---
 clang/include/clang/Basic/BuiltinsRISCV.td    |  8 +++
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp    | 32 +++++++++
 clang/lib/Headers/riscv_simd.h                | 40 +++++++++++
 .../RISCV/rvp-intrinsics/riscv64-simd.c       | 72 +++++++++++++++++++
 4 files changed, 152 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index c964a77f4ee28..4dff86e3080f2 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -288,6 +288,14 @@ def psh1add_h_64   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def psh1add_w      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def pssh1sadd_h_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def pssh1sadd_w    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def unzip8p        : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
+def unzip16p       : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
+def unzip8hp       : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
+def unzip16hp      : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
+def zip8p          : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
+def zip16p         : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
+def zip8hp         : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
+def zip16hp        : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index 79f60327b01ab..77bcbdcb94bb8 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -317,6 +317,14 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_pssh1sadd_h_32:
   case RISCV::BI__builtin_riscv_pssh1sadd_h_64:
   case RISCV::BI__builtin_riscv_pssh1sadd_w:
+  case RISCV::BI__builtin_riscv_unzip8p:
+  case RISCV::BI__builtin_riscv_unzip16p:
+  case RISCV::BI__builtin_riscv_unzip8hp:
+  case RISCV::BI__builtin_riscv_unzip16hp:
+  case RISCV::BI__builtin_riscv_zip8p:
+  case RISCV::BI__builtin_riscv_zip16p:
+  case RISCV::BI__builtin_riscv_zip8hp:
+  case RISCV::BI__builtin_riscv_zip16hp:
   case RISCV::BI__builtin_riscv_sadd: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -609,6 +617,30 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_pssh1sadd_w:
       ID = Intrinsic::riscv_pssh1sadd_w;
       break;
+    case RISCV::BI__builtin_riscv_unzip8p:
+      ID = Intrinsic::riscv_unzip8p;
+      break;
+    case RISCV::BI__builtin_riscv_unzip16p:
+      ID = Intrinsic::riscv_unzip16p;
+      break;
+    case RISCV::BI__builtin_riscv_unzip8hp:
+      ID = Intrinsic::riscv_unzip8hp;
+      break;
+    case RISCV::BI__builtin_riscv_unzip16hp:
+      ID = Intrinsic::riscv_unzip16hp;
+      break;
+    case RISCV::BI__builtin_riscv_zip8p:
+      ID = Intrinsic::riscv_zip8p;
+      break;
+    case RISCV::BI__builtin_riscv_zip16p:
+      ID = Intrinsic::riscv_zip16p;
+      break;
+    case RISCV::BI__builtin_riscv_zip8hp:
+      ID = Intrinsic::riscv_zip8hp;
+      break;
+    case RISCV::BI__builtin_riscv_zip16hp:
+      ID = Intrinsic::riscv_zip16hp;
+      break;
     case RISCV::BI__builtin_riscv_sadd:
       ID = Intrinsic::riscv_sadd;
       break;
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index c393a888637b0..aba7554fb2a4b 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -742,6 +742,46 @@ static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __riscv_pssh1sadd_w(uint64_t __x, uint64_t __y) {
   return __builtin_riscv_pssh1sadd_w(__x, __y);
 }
+
+static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_unzip8p(int64_t __x, int64_t __y) {
+  return __builtin_riscv_unzip8p(__x, __y);
+}
+
+static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_unzip16p(int64_t __x, int64_t __y) {
+  return __builtin_riscv_unzip16p(__x, __y);
+}
+
+static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_unzip8hp(int64_t __x, int64_t __y) {
+  return __builtin_riscv_unzip8hp(__x, __y);
+}
+
+static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_unzip16hp(int64_t __x, int64_t __y) {
+  return __builtin_riscv_unzip16hp(__x, __y);
+}
+
+static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_zip8p(int64_t __x, int64_t __y) {
+  return __builtin_riscv_zip8p(__x, __y);
+}
+
+static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_zip16p(int64_t __x, int64_t __y) {
+  return __builtin_riscv_zip16p(__x, __y);
+}
+
+static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_zip8hp(int64_t __x, int64_t __y) {
+  return __builtin_riscv_zip8hp(__x, __y);
+}
+
+static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_zip16hp(int64_t __x, int64_t __y) {
+  return __builtin_riscv_zip16hp(__x, __y);
+}
 #endif
 
 #endif // defined(__riscv_p)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
index 29fdbdb299773..13cf5a8d9c7ce 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -741,3 +741,75 @@ uint64_t pssh1sadd_h(uint64_t rs1, uint64_t rs2) {
 uint64_t pssh1sadd_w(uint64_t rs1, uint64_t rs2) {
   return __riscv_pssh1sadd_w(rs1, rs2);
 }
+
+// RV64P-LABEL: @unzip8p(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.unzip8p.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+int64_t unzip8p(int64_t rs1, int64_t rs2) {
+  return __riscv_unzip8p(rs1, rs2);
+}
+
+// RV64P-LABEL: @unzip16p(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.unzip16p.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+int64_t unzip16p(int64_t rs1, int64_t rs2) {
+  return __riscv_unzip16p(rs1, rs2);
+}
+
+// RV64P-LABEL: @unzip8hp(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.unzip8hp.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+int64_t unzip8hp(int64_t rs1, int64_t rs2) {
+  return __riscv_unzip8hp(rs1, rs2);
+}
+
+// RV64P-LABEL: @unzip16hp(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.unzip16hp.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+int64_t unzip16hp(int64_t rs1, int64_t rs2) {
+  return __riscv_unzip16hp(rs1, rs2);
+}
+
+// RV64P-LABEL: @zip8p(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.zip8p.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+int64_t zip8p(int64_t rs1, int64_t rs2) {
+  return __riscv_zip8p(rs1, rs2);
+}
+
+// RV64P-LABEL: @zip16p(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.zip16p.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+int64_t zip16p(int64_t rs1, int64_t rs2) {
+  return __riscv_zip16p(rs1, rs2);
+}
+
+// RV64P-LABEL: @zip8hp(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.zip8hp.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+int64_t zip8hp(int64_t rs1, int64_t rs2) {
+  return __riscv_zip8hp(rs1, rs2);
+}
+
+// RV64P-LABEL: @zip16hp(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.zip16hp.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+int64_t zip16hp(int64_t rs1, int64_t rs2) {
+  return __riscv_zip16hp(rs1, rs2);
+}

>From 1f3af5b9faa2f95b108a1c47c0b52c7b84aa19e5 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Fri, 5 Sep 2025 11:20:20 +0800
Subject: [PATCH 35/40] [RISCV] Packed Multiply C intrinsics (Lane Variants 00
 and 11)

---
 clang/include/clang/Basic/BuiltinsRISCV.td    |  30 ++++
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp    | 115 +++++++++++++
 clang/lib/Headers/riscv_simd.h                | 150 ++++++++++++++++
 .../RISCV/rvp-intrinsics/riscv32-simd.c       | 108 ++++++++++++
 .../RISCV/rvp-intrinsics/riscv64-simd.c       | 162 ++++++++++++++++++
 5 files changed, 565 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index 4dff86e3080f2..2119beeebb0e7 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -205,6 +205,18 @@ def slx_32         : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def psh1add_h_32   : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def ssh1sadd       : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
 def pssh1sadd_h_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmul_h_b00_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmul_h_b11_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulu_h_b00_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulu_h_b11_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulsu_h_b00_32: RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulsu_h_b11_32: RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mul_h00        : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mul_h11        : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulu_h00       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulu_h11       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulsu_h00      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulsu_h11      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 } // Features = "experimental-p,32bit"
 
 let Features = "experimental-p,64bit" in {
@@ -296,6 +308,24 @@ def zip8p          : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
 def zip16p         : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
 def zip8hp         : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
 def zip16hp        : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
+def pmul_h_b00_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmul_w_h00     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmul_h_b11_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmul_w_h11     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulu_h_b00_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulu_w_h00    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulu_h_b11_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulu_w_h11    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulsu_h_b00_64: RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulsu_w_h00   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulsu_h_b11_64: RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulsu_w_h11   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def mul_w00        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def mul_w11        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def mulu_w00       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def mulu_w11       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def mulsu_w00      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def mulsu_w11      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index 77bcbdcb94bb8..3d4d6ddd6bd0c 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -757,6 +757,26 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_pmul_w_h01:
   case RISCV::BI__builtin_riscv_pmulu_h_b01_32:
   case RISCV::BI__builtin_riscv_pmulu_h_b01_64:
+  case RISCV::BI__builtin_riscv_pmul_h_b00_32:
+  case RISCV::BI__builtin_riscv_pmul_h_b00_64:
+  case RISCV::BI__builtin_riscv_pmul_w_h00:
+  case RISCV::BI__builtin_riscv_pmul_h_b11_32:
+  case RISCV::BI__builtin_riscv_pmul_h_b11_64:
+  case RISCV::BI__builtin_riscv_pmul_w_h11:
+  case RISCV::BI__builtin_riscv_pmulu_h_b00_32:
+  case RISCV::BI__builtin_riscv_pmulu_h_b00_64:
+  case RISCV::BI__builtin_riscv_pmulu_w_h00:
+  case RISCV::BI__builtin_riscv_pmulu_h_b11_32:
+  case RISCV::BI__builtin_riscv_pmulu_h_b11_64:
+  case RISCV::BI__builtin_riscv_pmulu_w_h11:
+  case RISCV::BI__builtin_riscv_mul_h00:
+  case RISCV::BI__builtin_riscv_mul_w00:
+  case RISCV::BI__builtin_riscv_mul_h11:
+  case RISCV::BI__builtin_riscv_mul_w11:
+  case RISCV::BI__builtin_riscv_mulu_h00:
+  case RISCV::BI__builtin_riscv_mulu_w00:
+  case RISCV::BI__builtin_riscv_mulu_h11:
+  case RISCV::BI__builtin_riscv_mulu_w11:
   case RISCV::BI__builtin_riscv_pmulu_w_h01: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -774,11 +794,106 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_pmulu_w_h01:
       ID = Intrinsic::riscv_pmulu_w_h01;
       break;
+    case RISCV::BI__builtin_riscv_pmul_h_b00_32:
+    case RISCV::BI__builtin_riscv_pmul_h_b00_64:
+      ID = Intrinsic::riscv_pmul_h_b00;
+      break;
+    case RISCV::BI__builtin_riscv_pmul_w_h00:
+      ID = Intrinsic::riscv_pmul_w_h00;
+      break;
+    case RISCV::BI__builtin_riscv_pmul_h_b11_32:
+    case RISCV::BI__builtin_riscv_pmul_h_b11_64:
+      ID = Intrinsic::riscv_pmul_h_b11;
+      break;
+    case RISCV::BI__builtin_riscv_pmul_w_h11:
+      ID = Intrinsic::riscv_pmul_w_h11;
+      break;
+    case RISCV::BI__builtin_riscv_pmulu_h_b00_32:
+    case RISCV::BI__builtin_riscv_pmulu_h_b00_64:
+      ID = Intrinsic::riscv_pmulu_h_b00;
+      break;
+    case RISCV::BI__builtin_riscv_pmulu_w_h00:
+      ID = Intrinsic::riscv_pmulu_w_h00;
+      break;
+    case RISCV::BI__builtin_riscv_pmulu_h_b11_32:
+    case RISCV::BI__builtin_riscv_pmulu_h_b11_64:
+      ID = Intrinsic::riscv_pmulu_h_b11;
+      break;
+    case RISCV::BI__builtin_riscv_pmulu_w_h11:
+      ID = Intrinsic::riscv_pmulu_w_h11;
+      break;
+    case RISCV::BI__builtin_riscv_mul_h00:
+      ID = Intrinsic::riscv_mul_h00;
+      break;
+    case RISCV::BI__builtin_riscv_mul_w00:
+      ID = Intrinsic::riscv_mul_w00;
+      break;
+    case RISCV::BI__builtin_riscv_mul_h11:
+      ID = Intrinsic::riscv_mul_h11;
+      break;
+    case RISCV::BI__builtin_riscv_mul_w11:
+      ID = Intrinsic::riscv_mul_w11;
+      break;
+    case RISCV::BI__builtin_riscv_mulu_h00:
+      ID = Intrinsic::riscv_mulu_h00;
+      break;
+    case RISCV::BI__builtin_riscv_mulu_w00:
+      ID = Intrinsic::riscv_mulu_w00;
+      break;
+    case RISCV::BI__builtin_riscv_mulu_h11:
+      ID = Intrinsic::riscv_mulu_h11;
+      break;
+    case RISCV::BI__builtin_riscv_mulu_w11:
+      ID = Intrinsic::riscv_mulu_w11;
+      break;
     }
     IntrinsicTypes = {ResultType, Ops[0]->getType()};
     break;
   }
 
+  case RISCV::BI__builtin_riscv_pmulsu_h_b00_32:
+  case RISCV::BI__builtin_riscv_pmulsu_h_b00_64:
+  case RISCV::BI__builtin_riscv_pmulsu_w_h00:
+  case RISCV::BI__builtin_riscv_pmulsu_h_b11_32:
+  case RISCV::BI__builtin_riscv_pmulsu_h_b11_64:
+  case RISCV::BI__builtin_riscv_pmulsu_w_h11:
+  case RISCV::BI__builtin_riscv_mulsu_h00:
+  case RISCV::BI__builtin_riscv_mulsu_w00:
+  case RISCV::BI__builtin_riscv_mulsu_h11:
+  case RISCV::BI__builtin_riscv_mulsu_w11: {
+    switch (BuiltinID) {
+    default: llvm_unreachable("unexpected builtin ID");
+    case RISCV::BI__builtin_riscv_pmulsu_h_b00_32:
+    case RISCV::BI__builtin_riscv_pmulsu_h_b00_64:
+      ID = Intrinsic::riscv_pmulsu_h_b00;
+      break;
+    case RISCV::BI__builtin_riscv_pmulsu_w_h00:
+      ID = Intrinsic::riscv_pmulsu_w_h00;
+      break;
+    case RISCV::BI__builtin_riscv_pmulsu_h_b11_32:
+    case RISCV::BI__builtin_riscv_pmulsu_h_b11_64:
+      ID = Intrinsic::riscv_pmulsu_h_b11;
+      break;
+    case RISCV::BI__builtin_riscv_pmulsu_w_h11:
+      ID = Intrinsic::riscv_pmulsu_w_h11;
+      break;
+    case RISCV::BI__builtin_riscv_mulsu_h00:
+      ID = Intrinsic::riscv_mulsu_h00;
+      break;
+    case RISCV::BI__builtin_riscv_mulsu_w00:
+      ID = Intrinsic::riscv_mulsu_w00;
+      break;
+    case RISCV::BI__builtin_riscv_mulsu_h11:
+      ID = Intrinsic::riscv_mulsu_h11;
+      break;
+    case RISCV::BI__builtin_riscv_mulsu_w11:
+      ID = Intrinsic::riscv_mulsu_w11;
+      break;
+    }
+    IntrinsicTypes = {ResultType, Ops[0]->getType(), Ops[1]->getType()};
+    break;
+  }
+
   // Zk builtins
 
   // Zknh
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index aba7554fb2a4b..d3cc29db3c87a 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -339,6 +339,66 @@ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
 __riscv_pssh1sadd_h(uint32_t __x, uint32_t __y) {
   return __builtin_riscv_pssh1sadd_h_32(__x, __y);
 }
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmul_h_b00(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmul_h_b00_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmul_h_b11(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmul_h_b11_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulu_h_b00(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmulu_h_b00_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulu_h_b11(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmulu_h_b11_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulsu_h_b00(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmulsu_h_b00_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulsu_h_b11(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmulsu_h_b11_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mul_h00(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_mul_h00(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mul_h11(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_mul_h11(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mulu_h00(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_mulu_h00(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mulu_h11(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_mulu_h11(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mulsu_h00(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_mulsu_h00(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mulsu_h11(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_mulsu_h11(__x, __y);
+}
 #endif
 
 
@@ -782,6 +842,96 @@ static __inline__ int64_t __attribute__((__always_inline__, __nodebug__))
 __riscv_zip16hp(int64_t __x, int64_t __y) {
   return __builtin_riscv_zip16hp(__x, __y);
 }
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmul_h_b00(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmul_h_b00_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmul_w_h00(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmul_w_h00(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmul_h_b11(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmul_h_b11_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmul_w_h11(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmul_w_h11(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulu_h_b00(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulu_h_b00_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulu_w_h00(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulu_w_h00(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulu_h_b11(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulu_h_b11_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulu_w_h11(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulu_w_h11(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulsu_h_b00(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulsu_h_b00_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulsu_w_h00(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulsu_w_h00(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulsu_h_b11(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulsu_h_b11_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulsu_w_h11(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulsu_w_h11(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mul_w00(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_mul_w00(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mul_w11(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_mul_w11(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mulu_w00(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_mulu_w00(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mulu_w11(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_mulu_w11(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mulsu_w00(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_mulsu_w00(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mulsu_w11(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_mulsu_w11(__x, __y);
+}
 #endif
 
 #endif // defined(__riscv_p)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
index a5632a77de204..c75416b2d8b1d 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
@@ -580,3 +580,111 @@ int32_t ssh1sadd(int32_t rs1, int32_t rs2) {
 uint32_t pssh1sadd_h(uint32_t rs1, uint32_t rs2) {
   return __riscv_pssh1sadd_h(rs1, rs2);
 }
+
+// RV32P-LABEL: @pmul_h_b00(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmul.h.b00.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmul_h_b00(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmul_h_b00(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmul_h_b11(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmul.h.b11.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmul_h_b11(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmul_h_b11(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmulu_h_b00(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmulu.h.b00.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmulu_h_b00(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmulu_h_b00(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmulu_h_b11(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmulu.h.b11.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmulu_h_b11(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmulu_h_b11(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmulsu_h_b00(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmulsu.h.b00.i32.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmulsu_h_b00(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmulsu_h_b00(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmulsu_h_b11(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmulsu.h.b11.i32.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmulsu_h_b11(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmulsu_h_b11(rs1, rs2);
+}
+
+// RV32P-LABEL: @mul_h00(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.mul.h00.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t mul_h00(uint32_t rs1, uint32_t rs2) {
+  return __riscv_mul_h00(rs1, rs2);
+}
+
+// RV32P-LABEL: @mul_h11(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.mul.h11.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t mul_h11(uint32_t rs1, uint32_t rs2) {
+  return __riscv_mul_h11(rs1, rs2);
+}
+
+// RV32P-LABEL: @mulu_h00(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.mulu.h00.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t mulu_h00(uint32_t rs1, uint32_t rs2) {
+  return __riscv_mulu_h00(rs1, rs2);
+}
+
+// RV32P-LABEL: @mulu_h11(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.mulu.h11.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t mulu_h11(uint32_t rs1, uint32_t rs2) {
+  return __riscv_mulu_h11(rs1, rs2);
+}
+
+// RV32P-LABEL: @mulsu_h00(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.mulsu.h00.i32.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t mulsu_h00(uint32_t rs1, uint32_t rs2) {
+  return __riscv_mulsu_h00(rs1, rs2);
+}
+
+// RV32P-LABEL: @mulsu_h11(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.mulsu.h11.i32.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t mulsu_h11(uint32_t rs1, uint32_t rs2) {
+  return __riscv_mulsu_h11(rs1, rs2);
+}
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
index 13cf5a8d9c7ce..9693d4ee2546e 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -813,3 +813,165 @@ int64_t zip8hp(int64_t rs1, int64_t rs2) {
 int64_t zip16hp(int64_t rs1, int64_t rs2) {
   return __riscv_zip16hp(rs1, rs2);
 }
+
+// RV64P-LABEL: @pmul_h_b00(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmul.h.b00.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmul_h_b00(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmul_h_b00(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmul_w_h00(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmul.w.h00.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmul_w_h00(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmul_w_h00(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmul_h_b11(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmul.h.b11.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmul_h_b11(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmul_h_b11(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmul_w_h11(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmul.w.h11.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmul_w_h11(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmul_w_h11(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulu_h_b00(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulu.h.b00.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulu_h_b00(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulu_h_b00(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulu_w_h00(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulu.w.h00.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulu_w_h00(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulu_w_h00(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulu_h_b11(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulu.h.b11.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulu_h_b11(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulu_h_b11(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulu_w_h11(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulu.w.h11.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulu_w_h11(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulu_w_h11(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulsu_h_b00(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulsu.h.b00.i64.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulsu_h_b00(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulsu_h_b00(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulsu_w_h00(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulsu.w.h00.i64.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulsu_w_h00(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulsu_w_h00(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulsu_h_b11(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulsu.h.b11.i64.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulsu_h_b11(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulsu_h_b11(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulsu_w_h11(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulsu.w.h11.i64.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulsu_w_h11(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulsu_w_h11(rs1, rs2);
+}
+
+// RV64P-LABEL: @mul_w00(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.mul.w00.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t mul_w00(uint64_t rs1, uint64_t rs2) {
+  return __riscv_mul_w00(rs1, rs2);
+}
+
+// RV64P-LABEL: @mul_w11(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.mul.w11.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t mul_w11(uint64_t rs1, uint64_t rs2) {
+  return __riscv_mul_w11(rs1, rs2);
+}
+
+// RV64P-LABEL: @mulu_w00(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.mulu.w00.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t mulu_w00(uint64_t rs1, uint64_t rs2) {
+  return __riscv_mulu_w00(rs1, rs2);
+}
+
+// RV64P-LABEL: @mulu_w11(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.mulu.w11.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t mulu_w11(uint64_t rs1, uint64_t rs2) {
+  return __riscv_mulu_w11(rs1, rs2);
+}
+
+// RV64P-LABEL: @mulsu_w00(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.mulsu.w00.i64.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t mulsu_w00(uint64_t rs1, uint64_t rs2) {
+  return __riscv_mulsu_w00(rs1, rs2);
+}
+
+// RV64P-LABEL: @mulsu_w11(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.mulsu.w11.i64.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t mulsu_w11(uint64_t rs1, uint64_t rs2) {
+  return __riscv_mulsu_w11(rs1, rs2);
+}

>From e4891ea649710fe85003f392539f78d409711478 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Fri, 5 Sep 2025 11:24:39 +0800
Subject: [PATCH 36/40] [RISCV] Packed and Reordered Pack C intrinsics

---
 clang/include/clang/Basic/BuiltinsRISCV.td    | 18 ++++
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp    | 58 +++++++++++
 clang/lib/Headers/riscv_simd.h                | 90 +++++++++++++++++
 .../RISCV/rvp-intrinsics/riscv32-simd.c       | 63 ++++++++++++
 .../RISCV/rvp-intrinsics/riscv64-simd.c       | 99 +++++++++++++++++++
 5 files changed, 328 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index 2119beeebb0e7..6e588b2df13c2 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -217,6 +217,13 @@ def mulu_h00       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def mulu_h11       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def mulsu_h00      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def mulsu_h11      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def ppack_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def ppackbt_h_32   : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def packbt_32      : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def ppacktb_h_32   : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def packtb_32      : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def ppackt_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def packt_32       : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
 } // Features = "experimental-p,32bit"
 
 let Features = "experimental-p,64bit" in {
@@ -326,6 +333,17 @@ def mulu_w00       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def mulu_w11       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def mulsu_w00      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def mulsu_w11      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def ppack_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def ppack_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def ppackbt_h_64   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def ppackbt_w      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def packbt_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def ppacktb_h_64   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def ppacktb_w      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def packtb_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def ppackt_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def ppackt_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def packt_64       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index 3d4d6ddd6bd0c..196faf524b891 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -325,6 +325,24 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_zip16p:
   case RISCV::BI__builtin_riscv_zip8hp:
   case RISCV::BI__builtin_riscv_zip16hp:
+  case RISCV::BI__builtin_riscv_ppack_h_32:
+  case RISCV::BI__builtin_riscv_ppack_h_64:
+  case RISCV::BI__builtin_riscv_ppack_w:
+  case RISCV::BI__builtin_riscv_ppackbt_h_32:
+  case RISCV::BI__builtin_riscv_ppackbt_h_64:
+  case RISCV::BI__builtin_riscv_ppackbt_w:
+  case RISCV::BI__builtin_riscv_packbt_32:
+  case RISCV::BI__builtin_riscv_packbt_64:
+  case RISCV::BI__builtin_riscv_ppacktb_h_32:
+  case RISCV::BI__builtin_riscv_ppacktb_h_64:
+  case RISCV::BI__builtin_riscv_ppacktb_w:
+  case RISCV::BI__builtin_riscv_packtb_32:
+  case RISCV::BI__builtin_riscv_packtb_64:
+  case RISCV::BI__builtin_riscv_ppackt_h_32:
+  case RISCV::BI__builtin_riscv_ppackt_h_64:
+  case RISCV::BI__builtin_riscv_ppackt_w:
+  case RISCV::BI__builtin_riscv_packt_32:
+  case RISCV::BI__builtin_riscv_packt_64:
   case RISCV::BI__builtin_riscv_sadd: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -641,6 +659,46 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_zip16hp:
       ID = Intrinsic::riscv_zip16hp;
       break;
+    case RISCV::BI__builtin_riscv_ppack_h_32:
+    case RISCV::BI__builtin_riscv_ppack_h_64:
+      ID = Intrinsic::riscv_ppack_h;
+      break;
+    case RISCV::BI__builtin_riscv_ppack_w:
+      ID = Intrinsic::riscv_ppack_w;
+      break;
+    case RISCV::BI__builtin_riscv_ppackbt_h_32:
+    case RISCV::BI__builtin_riscv_ppackbt_h_64:
+      ID = Intrinsic::riscv_ppackbt_h;
+      break;
+    case RISCV::BI__builtin_riscv_ppackbt_w:
+      ID = Intrinsic::riscv_ppackbt_w;
+      break;
+    case RISCV::BI__builtin_riscv_packbt_32:
+    case RISCV::BI__builtin_riscv_packbt_64:
+      ID = Intrinsic::riscv_packbt;
+      break;
+    case RISCV::BI__builtin_riscv_ppacktb_h_32:
+    case RISCV::BI__builtin_riscv_ppacktb_h_64:
+      ID = Intrinsic::riscv_ppacktb_h;
+      break;
+    case RISCV::BI__builtin_riscv_ppacktb_w:
+      ID = Intrinsic::riscv_ppacktb_w;
+      break;
+    case RISCV::BI__builtin_riscv_packtb_32:
+    case RISCV::BI__builtin_riscv_packtb_64:
+      ID = Intrinsic::riscv_packtb;
+      break;
+    case RISCV::BI__builtin_riscv_ppackt_h_32:
+    case RISCV::BI__builtin_riscv_ppackt_h_64:
+      ID = Intrinsic::riscv_ppackt_h;
+      break;
+    case RISCV::BI__builtin_riscv_ppackt_w:
+      ID = Intrinsic::riscv_ppackt_w;
+      break;
+    case RISCV::BI__builtin_riscv_packt_32:
+    case RISCV::BI__builtin_riscv_packt_64:
+      ID = Intrinsic::riscv_packt;
+      break;
     case RISCV::BI__builtin_riscv_sadd:
       ID = Intrinsic::riscv_sadd;
       break;
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index d3cc29db3c87a..f15b8627fdd63 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -399,6 +399,41 @@ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
 __riscv_mulsu_h11(uint32_t __x, uint32_t __y) {
   return __builtin_riscv_mulsu_h11(__x, __y);
 }
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_ppack_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_ppack_h_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_ppackbt_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_ppackbt_h_32(__x, __y);
+}
+
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_packbt(int32_t __x, int32_t __y) {
+  return __builtin_riscv_packbt_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_ppacktb_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_ppacktb_h_32(__x, __y);
+}
+
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_packtb(int32_t __x, int32_t __y) {
+  return __builtin_riscv_packtb_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_ppackt_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_ppackt_h_32(__x, __y);
+}
+
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_packt(int32_t __x, int32_t __y) {
+  return __builtin_riscv_packt_32(__x, __y);
+}
 #endif
 
 
@@ -932,6 +967,61 @@ static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __riscv_mulsu_w11(uint64_t __x, uint64_t __y) {
   return __builtin_riscv_mulsu_w11(__x, __y);
 }
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_ppack_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_ppack_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_ppack_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_ppack_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_ppackbt_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_ppackbt_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_ppackbt_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_ppackbt_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_packbt(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_packbt_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_ppacktb_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_ppacktb_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_ppacktb_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_ppacktb_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_packtb(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_packtb_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_ppackt_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_ppackt_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_ppackt_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_ppackt_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_packt(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_packt_64(__x, __y);
+}
 #endif
 
 #endif // defined(__riscv_p)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
index c75416b2d8b1d..8754fc8de50f9 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
@@ -688,3 +688,66 @@ uint32_t mulsu_h00(uint32_t rs1, uint32_t rs2) {
 uint32_t mulsu_h11(uint32_t rs1, uint32_t rs2) {
   return __riscv_mulsu_h11(rs1, rs2);
 }
+
+// RV32P-LABEL: @ppack_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.ppack.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t ppack_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_ppack_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @ppackbt_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.ppackbt.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t ppackbt_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_ppackbt_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @packbt(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.packbt.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+int32_t packbt(int32_t rs1, int32_t rs2) {
+  return __riscv_packbt(rs1, rs2);
+}
+
+// RV32P-LABEL: @ppacktb_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.ppacktb.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t ppacktb_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_ppacktb_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @packtb(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.packtb.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+int32_t packtb(int32_t rs1, int32_t rs2) {
+  return __riscv_packtb(rs1, rs2);
+}
+
+// RV32P-LABEL: @ppackt_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.ppackt.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t ppackt_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_ppackt_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @packt(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.packt.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+int32_t packt(int32_t rs1, int32_t rs2) {
+  return __riscv_packt(rs1, rs2);
+}
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
index 9693d4ee2546e..56bcdaabd1c3b 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -975,3 +975,102 @@ uint64_t mulsu_w00(uint64_t rs1, uint64_t rs2) {
 uint64_t mulsu_w11(uint64_t rs1, uint64_t rs2) {
   return __riscv_mulsu_w11(rs1, rs2);
 }
+
+// RV64P-LABEL: @ppack_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.ppack.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t ppack_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_ppack_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @ppack_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.ppack.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t ppack_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_ppack_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @ppackbt_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.ppackbt.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t ppackbt_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_ppackbt_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @ppackbt_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.ppackbt.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t ppackbt_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_ppackbt_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @packbt(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.packbt.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t packbt(uint64_t rs1, uint64_t rs2) {
+  return __riscv_packbt(rs1, rs2);
+}
+
+// RV64P-LABEL: @ppacktb_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.ppacktb.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t ppacktb_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_ppacktb_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @ppacktb_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.ppacktb.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t ppacktb_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_ppacktb_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @packtb(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.packtb.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t packtb(uint64_t rs1, uint64_t rs2) {
+  return __riscv_packtb(rs1, rs2);
+}
+
+// RV64P-LABEL: @ppackt_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.ppackt.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t ppackt_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_ppackt_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @ppackt_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.ppackt.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t ppackt_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_ppackt_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @packt(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.packt.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t packt(uint64_t rs1, uint64_t rs2) {
+  return __riscv_packt(rs1, rs2);
+}

>From 9a52614bad547452372ff7d17bf75fa41e4ec5bf Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Fri, 5 Sep 2025 11:28:52 +0800
Subject: [PATCH 37/40] [RISCV] Cross-Lane Packed Add/Sub C intrinsics

---
 clang/include/clang/Basic/BuiltinsRISCV.td    |  18 +++
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp    |  60 ++++++++++
 clang/lib/Headers/riscv_simd.h                |  90 +++++++++++++++
 .../RISCV/rvp-intrinsics/riscv32-simd.c       |  54 +++++++++
 .../RISCV/rvp-intrinsics/riscv64-simd.c       | 108 ++++++++++++++++++
 5 files changed, 330 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index 6e588b2df13c2..fe184a2aae107 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -224,6 +224,12 @@ def ppacktb_h_32   : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def packtb_32      : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
 def ppackt_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def packt_32       : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def pas_hx_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psa_hx_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psas_hx_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pssa_hx_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def paas_hx_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pasa_hx_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 } // Features = "experimental-p,32bit"
 
 let Features = "experimental-p,64bit" in {
@@ -344,6 +350,18 @@ def packtb_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def ppackt_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def ppackt_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def packt_64       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pas_hx_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pas_wx         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psa_hx_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psa_wx         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psas_hx_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psas_wx        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pssa_hx_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pssa_wx        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def paas_hx_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def paas_wx        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pasa_hx_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pasa_wx        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index 196faf524b891..283f945a67c2c 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -343,6 +343,24 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_ppackt_w:
   case RISCV::BI__builtin_riscv_packt_32:
   case RISCV::BI__builtin_riscv_packt_64:
+  case RISCV::BI__builtin_riscv_pas_hx_32:
+  case RISCV::BI__builtin_riscv_pas_hx_64:
+  case RISCV::BI__builtin_riscv_pas_wx:
+  case RISCV::BI__builtin_riscv_psa_hx_32:
+  case RISCV::BI__builtin_riscv_psa_hx_64:
+  case RISCV::BI__builtin_riscv_psa_wx:
+  case RISCV::BI__builtin_riscv_psas_hx_32:
+  case RISCV::BI__builtin_riscv_psas_hx_64:
+  case RISCV::BI__builtin_riscv_psas_wx:
+  case RISCV::BI__builtin_riscv_pssa_hx_32:
+  case RISCV::BI__builtin_riscv_pssa_hx_64:
+  case RISCV::BI__builtin_riscv_pssa_wx:
+  case RISCV::BI__builtin_riscv_paas_hx_32:
+  case RISCV::BI__builtin_riscv_paas_hx_64:
+  case RISCV::BI__builtin_riscv_paas_wx:
+  case RISCV::BI__builtin_riscv_pasa_hx_32:
+  case RISCV::BI__builtin_riscv_pasa_hx_64:
+  case RISCV::BI__builtin_riscv_pasa_wx:
   case RISCV::BI__builtin_riscv_sadd: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -699,6 +717,48 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_packt_64:
       ID = Intrinsic::riscv_packt;
       break;
+    case RISCV::BI__builtin_riscv_pas_hx_32:
+    case RISCV::BI__builtin_riscv_pas_hx_64:
+      ID = Intrinsic::riscv_pas_hx;
+      break;
+    case RISCV::BI__builtin_riscv_pas_wx:
+      ID = Intrinsic::riscv_pas_wx;
+      break;
+    case RISCV::BI__builtin_riscv_psa_hx_32:
+    case RISCV::BI__builtin_riscv_psa_hx_64:
+      ID = Intrinsic::riscv_psa_hx;
+      break;
+    case RISCV::BI__builtin_riscv_psa_wx:
+      ID = Intrinsic::riscv_psa_wx;
+      break;
+    case RISCV::BI__builtin_riscv_psas_hx_32:
+    case RISCV::BI__builtin_riscv_psas_hx_64:
+      ID = Intrinsic::riscv_psas_hx;
+      break;
+    case RISCV::BI__builtin_riscv_psas_wx:
+      ID = Intrinsic::riscv_psas_wx;
+      break;
+    case RISCV::BI__builtin_riscv_pssa_hx_32:
+    case RISCV::BI__builtin_riscv_pssa_hx_64:
+      ID = Intrinsic::riscv_pssa_hx;
+      break;
+    case RISCV::BI__builtin_riscv_pssa_wx:
+      ID = Intrinsic::riscv_pssa_wx;
+      break;
+    case RISCV::BI__builtin_riscv_paas_hx_32:
+    case RISCV::BI__builtin_riscv_paas_hx_64:
+      ID = Intrinsic::riscv_paas_hx;
+      break;
+    case RISCV::BI__builtin_riscv_paas_wx:
+      ID = Intrinsic::riscv_paas_wx;
+      break;
+    case RISCV::BI__builtin_riscv_pasa_hx_32:
+    case RISCV::BI__builtin_riscv_pasa_hx_64:
+      ID = Intrinsic::riscv_pasa_hx;
+      break;
+    case RISCV::BI__builtin_riscv_pasa_wx:
+      ID = Intrinsic::riscv_pasa_wx;
+      break;
     case RISCV::BI__builtin_riscv_sadd:
       ID = Intrinsic::riscv_sadd;
       break;
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index f15b8627fdd63..7267b3ef8fb1a 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -434,6 +434,36 @@ static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
 __riscv_packt(int32_t __x, int32_t __y) {
   return __builtin_riscv_packt_32(__x, __y);
 }
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pas_hx(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pas_hx_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psa_hx(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_psa_hx_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psas_hx(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_psas_hx_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pssa_hx(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pssa_hx_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_paas_hx(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_paas_hx_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pasa_hx(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pasa_hx_32(__x, __y);
+}
 #endif
 
 
@@ -1022,6 +1052,66 @@ static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __riscv_packt(uint64_t __x, uint64_t __y) {
   return __builtin_riscv_packt_64(__x, __y);
 }
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pas_hx(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pas_hx_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pas_wx(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pas_wx(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psa_hx(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psa_hx_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psa_wx(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psa_wx(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psas_hx(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psas_hx_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psas_wx(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_psas_wx(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pssa_hx(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pssa_hx_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pssa_wx(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pssa_wx(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_paas_hx(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_paas_hx_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_paas_wx(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_paas_wx(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pasa_hx(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pasa_hx_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pasa_wx(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pasa_wx(__x, __y);
+}
 #endif
 
 #endif // defined(__riscv_p)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
index 8754fc8de50f9..630b2f4273986 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
@@ -751,3 +751,57 @@ uint32_t ppackt_h(uint32_t rs1, uint32_t rs2) {
 int32_t packt(int32_t rs1, int32_t rs2) {
   return __riscv_packt(rs1, rs2);
 }
+
+// RV32P-LABEL: @pas_hx(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pas.hx.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pas_hx(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pas_hx(rs1, rs2);
+}
+
+// RV32P-LABEL: @psa_hx(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.psa.hx.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t psa_hx(uint32_t rs1, uint32_t rs2) {
+  return __riscv_psa_hx(rs1, rs2);
+}
+
+// RV32P-LABEL: @psas_hx(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.psas.hx.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t psas_hx(uint32_t rs1, uint32_t rs2) {
+  return __riscv_psas_hx(rs1, rs2);
+}
+
+// RV32P-LABEL: @pssa_hx(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pssa.hx.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pssa_hx(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pssa_hx(rs1, rs2);
+}
+
+// RV32P-LABEL: @paas_hx(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.paas.hx.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t paas_hx(uint32_t rs1, uint32_t rs2) {
+  return __riscv_paas_hx(rs1, rs2);
+}
+
+// RV32P-LABEL: @pasa_hx(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pasa.hx.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pasa_hx(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pasa_hx(rs1, rs2);
+}
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
index 56bcdaabd1c3b..2cc1046629f9b 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -1074,3 +1074,111 @@ uint64_t ppackt_w(uint64_t rs1, uint64_t rs2) {
 uint64_t packt(uint64_t rs1, uint64_t rs2) {
   return __riscv_packt(rs1, rs2);
 }
+
+// RV64P-LABEL: @pas_hx(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pas.hx.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pas_hx(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pas_hx(rs1, rs2);
+}
+
+// RV64P-LABEL: @pas_wx(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pas.wx.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pas_wx(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pas_wx(rs1, rs2);
+}
+
+// RV64P-LABEL: @psa_hx(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psa.hx.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psa_hx(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psa_hx(rs1, rs2);
+}
+
+// RV64P-LABEL: @psa_wx(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psa.wx.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psa_wx(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psa_wx(rs1, rs2);
+}
+
+// RV64P-LABEL: @psas_hx(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psas.hx.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psas_hx(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psas_hx(rs1, rs2);
+}
+
+// RV64P-LABEL: @psas_wx(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.psas.wx.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t psas_wx(uint64_t rs1, uint64_t rs2) {
+  return __riscv_psas_wx(rs1, rs2);
+}
+
+// RV64P-LABEL: @pssa_hx(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pssa.hx.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pssa_hx(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pssa_hx(rs1, rs2);
+}
+
+// RV64P-LABEL: @pssa_wx(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pssa.wx.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pssa_wx(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pssa_wx(rs1, rs2);
+}
+
+// RV64P-LABEL: @paas_hx(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.paas.hx.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t paas_hx(uint64_t rs1, uint64_t rs2) {
+  return __riscv_paas_hx(rs1, rs2);
+}
+
+// RV64P-LABEL: @paas_wx(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.paas.wx.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t paas_wx(uint64_t rs1, uint64_t rs2) {
+  return __riscv_paas_wx(rs1, rs2);
+}
+
+// RV64P-LABEL: @pasa_hx(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pasa.hx.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pasa_hx(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pasa_hx(rs1, rs2);
+}
+
+// RV64P-LABEL: @pasa_wx(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pasa.wx.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pasa_wx(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pasa_wx(rs1, rs2);
+}

>From 77038a5567de3f0ffe5b5b558075555032884d67 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Fri, 5 Sep 2025 12:16:55 +0800
Subject: [PATCH 38/40] [RISCV] Packed Comparison and Min/Max C intrinsics

---
 clang/include/clang/Basic/BuiltinsRISCV.td    |  38 ++++
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp    | 124 ++++++++++++
 clang/lib/Headers/riscv_simd.h                | 190 ++++++++++++++++++
 .../RISCV/rvp-intrinsics/riscv32-simd.c       | 153 ++++++++++++++
 .../RISCV/rvp-intrinsics/riscv64-simd.c       | 189 +++++++++++++++++
 5 files changed, 694 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index fe184a2aae107..ffb3edbc7c6d6 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -230,6 +230,23 @@ def psas_hx_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def pssa_hx_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def paas_hx_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def pasa_hx_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mseq           : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def pmseq_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmseq_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mslt           : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def pmslt_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmslt_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def msltu          : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmsltu_b_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmsltu_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmin_b_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmin_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pminu_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pminu_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmax_b_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmax_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmaxu_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmaxu_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 } // Features = "experimental-p,32bit"
 
 let Features = "experimental-p,64bit" in {
@@ -362,6 +379,27 @@ def paas_hx_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def paas_wx        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def pasa_hx_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def pasa_wx        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmseq_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmseq_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmseq_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmslt_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmslt_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmslt_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmsltu_b_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmsltu_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmsltu_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmin_b_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmin_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmin_w         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pminu_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pminu_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pminu_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmax_b_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmax_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmax_w         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmaxu_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmaxu_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmaxu_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index 283f945a67c2c..6105b5b3c8e8c 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -361,6 +361,44 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_pasa_hx_32:
   case RISCV::BI__builtin_riscv_pasa_hx_64:
   case RISCV::BI__builtin_riscv_pasa_wx:
+  case RISCV::BI__builtin_riscv_mseq:
+  case RISCV::BI__builtin_riscv_pmseq_b_32:
+  case RISCV::BI__builtin_riscv_pmseq_b_64:
+  case RISCV::BI__builtin_riscv_pmseq_h_32:
+  case RISCV::BI__builtin_riscv_pmseq_h_64:
+  case RISCV::BI__builtin_riscv_pmseq_w:
+  case RISCV::BI__builtin_riscv_mslt:
+  case RISCV::BI__builtin_riscv_pmslt_b_32:
+  case RISCV::BI__builtin_riscv_pmslt_b_64:
+  case RISCV::BI__builtin_riscv_pmslt_h_32:
+  case RISCV::BI__builtin_riscv_pmslt_h_64:
+  case RISCV::BI__builtin_riscv_pmslt_w:
+  case RISCV::BI__builtin_riscv_msltu:
+  case RISCV::BI__builtin_riscv_pmsltu_b_32:
+  case RISCV::BI__builtin_riscv_pmsltu_b_64:
+  case RISCV::BI__builtin_riscv_pmsltu_h_32:
+  case RISCV::BI__builtin_riscv_pmsltu_h_64:
+  case RISCV::BI__builtin_riscv_pmsltu_w:
+  case RISCV::BI__builtin_riscv_pmin_b_32:
+  case RISCV::BI__builtin_riscv_pmin_b_64:
+  case RISCV::BI__builtin_riscv_pmin_h_32:
+  case RISCV::BI__builtin_riscv_pmin_h_64:
+  case RISCV::BI__builtin_riscv_pmin_w:
+  case RISCV::BI__builtin_riscv_pminu_b_32:
+  case RISCV::BI__builtin_riscv_pminu_b_64:
+  case RISCV::BI__builtin_riscv_pminu_h_32:
+  case RISCV::BI__builtin_riscv_pminu_h_64:
+  case RISCV::BI__builtin_riscv_pminu_w:
+  case RISCV::BI__builtin_riscv_pmax_b_32:
+  case RISCV::BI__builtin_riscv_pmax_b_64:
+  case RISCV::BI__builtin_riscv_pmax_h_32:
+  case RISCV::BI__builtin_riscv_pmax_h_64:
+  case RISCV::BI__builtin_riscv_pmax_w:
+  case RISCV::BI__builtin_riscv_pmaxu_b_32:
+  case RISCV::BI__builtin_riscv_pmaxu_b_64:
+  case RISCV::BI__builtin_riscv_pmaxu_h_32:
+  case RISCV::BI__builtin_riscv_pmaxu_h_64:
+  case RISCV::BI__builtin_riscv_pmaxu_w:
   case RISCV::BI__builtin_riscv_sadd: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -759,6 +797,92 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_pasa_wx:
       ID = Intrinsic::riscv_pasa_wx;
       break;
+    case RISCV::BI__builtin_riscv_mseq:
+      ID = Intrinsic::riscv_mseq;
+      break;
+    case RISCV::BI__builtin_riscv_pmseq_b_32:
+    case RISCV::BI__builtin_riscv_pmseq_b_64:
+      ID = Intrinsic::riscv_pmseq_b;
+      break;
+    case RISCV::BI__builtin_riscv_pmseq_h_32:
+    case RISCV::BI__builtin_riscv_pmseq_h_64:
+      ID = Intrinsic::riscv_pmseq_h;
+      break;
+    case RISCV::BI__builtin_riscv_pmseq_w:
+      ID = Intrinsic::riscv_pmseq_w;
+      break;
+    case RISCV::BI__builtin_riscv_mslt:
+      ID = Intrinsic::riscv_mslt;
+      break;
+    case RISCV::BI__builtin_riscv_pmslt_b_32:
+    case RISCV::BI__builtin_riscv_pmslt_b_64:
+      ID = Intrinsic::riscv_pmslt_b;
+      break;
+    case RISCV::BI__builtin_riscv_pmslt_h_32:
+    case RISCV::BI__builtin_riscv_pmslt_h_64:
+      ID = Intrinsic::riscv_pmslt_h;
+      break;
+    case RISCV::BI__builtin_riscv_pmslt_w:
+      ID = Intrinsic::riscv_pmslt_w;
+      break;
+    case RISCV::BI__builtin_riscv_msltu:
+      ID = Intrinsic::riscv_msltu;
+      break;
+    case RISCV::BI__builtin_riscv_pmsltu_b_32:
+    case RISCV::BI__builtin_riscv_pmsltu_b_64:
+      ID = Intrinsic::riscv_pmsltu_b;
+      break;
+    case RISCV::BI__builtin_riscv_pmsltu_h_32:
+    case RISCV::BI__builtin_riscv_pmsltu_h_64:
+      ID = Intrinsic::riscv_pmsltu_h;
+      break;
+    case RISCV::BI__builtin_riscv_pmsltu_w:
+      ID = Intrinsic::riscv_pmsltu_w;
+      break;
+    case RISCV::BI__builtin_riscv_pmin_b_32:
+    case RISCV::BI__builtin_riscv_pmin_b_64:
+      ID = Intrinsic::riscv_pmin_b;
+      break;
+    case RISCV::BI__builtin_riscv_pmin_h_32:
+    case RISCV::BI__builtin_riscv_pmin_h_64:
+      ID = Intrinsic::riscv_pmin_h;
+      break;
+    case RISCV::BI__builtin_riscv_pmin_w:
+      ID = Intrinsic::riscv_pmin_w;
+      break;
+    case RISCV::BI__builtin_riscv_pminu_b_32:
+    case RISCV::BI__builtin_riscv_pminu_b_64:
+      ID = Intrinsic::riscv_pminu_b;
+      break;
+    case RISCV::BI__builtin_riscv_pminu_h_32:
+    case RISCV::BI__builtin_riscv_pminu_h_64:
+      ID = Intrinsic::riscv_pminu_h;
+      break;
+    case RISCV::BI__builtin_riscv_pminu_w:
+      ID = Intrinsic::riscv_pminu_w;
+      break;
+    case RISCV::BI__builtin_riscv_pmax_b_32:
+    case RISCV::BI__builtin_riscv_pmax_b_64:
+      ID = Intrinsic::riscv_pmax_b;
+      break;
+    case RISCV::BI__builtin_riscv_pmax_h_32:
+    case RISCV::BI__builtin_riscv_pmax_h_64:
+      ID = Intrinsic::riscv_pmax_h;
+      break;
+    case RISCV::BI__builtin_riscv_pmax_w:
+      ID = Intrinsic::riscv_pmax_w;
+      break;
+    case RISCV::BI__builtin_riscv_pmaxu_b_32:
+    case RISCV::BI__builtin_riscv_pmaxu_b_64:
+      ID = Intrinsic::riscv_pmaxu_b;
+      break;
+    case RISCV::BI__builtin_riscv_pmaxu_h_32:
+    case RISCV::BI__builtin_riscv_pmaxu_h_64:
+      ID = Intrinsic::riscv_pmaxu_h;
+      break;
+    case RISCV::BI__builtin_riscv_pmaxu_w:
+      ID = Intrinsic::riscv_pmaxu_w;
+      break;
     case RISCV::BI__builtin_riscv_sadd:
       ID = Intrinsic::riscv_sadd;
       break;
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index 7267b3ef8fb1a..8eca76faa2cef 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -464,6 +464,91 @@ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
 __riscv_pasa_hx(uint32_t __x, uint32_t __y) {
   return __builtin_riscv_pasa_hx_32(__x, __y);
 }
+
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mseq(int32_t __x, int32_t __y) {
+  return __builtin_riscv_mseq(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmseq_b(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmseq_b_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmseq_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmseq_h_32(__x, __y);
+}
+
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mslt(int32_t __x, int32_t __y) {
+  return __builtin_riscv_mslt(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmslt_b(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmslt_b_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmslt_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmslt_h_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_msltu(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_msltu(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmsltu_b(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmsltu_b_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmsltu_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmsltu_h_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmin_b(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmin_b_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmin_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmin_h_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pminu_b(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pminu_b_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pminu_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pminu_h_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmax_b(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmax_b_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmax_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmax_h_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmaxu_b(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmaxu_b_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmaxu_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmaxu_h_32(__x, __y);
+}
 #endif
 
 
@@ -1112,6 +1197,111 @@ static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __riscv_pasa_wx(uint64_t __x, uint64_t __y) {
   return __builtin_riscv_pasa_wx(__x, __y);
 }
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmseq_b(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmseq_b_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmseq_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmseq_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmseq_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmseq_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmslt_b(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmslt_b_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmslt_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmslt_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmslt_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmslt_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmsltu_b(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmsltu_b_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmsltu_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmsltu_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmsltu_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmsltu_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmin_b(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmin_b_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmin_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmin_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmin_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmin_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pminu_b(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pminu_b_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pminu_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pminu_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pminu_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pminu_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmax_b(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmax_b_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmax_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmax_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmax_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmax_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmaxu_b(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmaxu_b_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmaxu_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmaxu_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmaxu_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmaxu_w(__x, __y);
+}
 #endif
 
 #endif // defined(__riscv_p)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
index 630b2f4273986..c773a426ed195 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
@@ -805,3 +805,156 @@ uint32_t paas_hx(uint32_t rs1, uint32_t rs2) {
 uint32_t pasa_hx(uint32_t rs1, uint32_t rs2) {
   return __riscv_pasa_hx(rs1, rs2);
 }
+
+// RV32P-LABEL: @mseq(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.mseq.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+int32_t mseq(int32_t rs1, int32_t rs2) {
+  return __riscv_mseq(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmseq_b(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmseq.b.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmseq_b(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmseq_b(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmseq_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmseq.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmseq_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmseq_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @mslt(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.mslt.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+int32_t mslt(int32_t rs1, int32_t rs2) {
+  return __riscv_mslt(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmslt_b(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmslt.b.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmslt_b(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmslt_b(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmslt_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmslt.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmslt_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmslt_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @msltu(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.msltu.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t msltu(uint32_t rs1, uint32_t rs2) {
+  return __riscv_msltu(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmsltu_b(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmsltu.b.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmsltu_b(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmsltu_b(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmsltu_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmsltu.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmsltu_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmsltu_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmin_b(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmin.b.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmin_b(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmin_b(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmin_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmin.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmin_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmin_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @pminu_b(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pminu.b.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pminu_b(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pminu_b(rs1, rs2);
+}
+
+// RV32P-LABEL: @pminu_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pminu.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pminu_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pminu_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmax_b(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmax.b.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmax_b(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmax_b(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmax_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmax.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmax_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmax_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmaxu_b(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmaxu.b.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmaxu_b(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmaxu_b(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmaxu_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmaxu.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmaxu_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmaxu_h(rs1, rs2);
+}
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
index 2cc1046629f9b..e5b3badbf7833 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -1182,3 +1182,192 @@ uint64_t pasa_hx(uint64_t rs1, uint64_t rs2) {
 uint64_t pasa_wx(uint64_t rs1, uint64_t rs2) {
   return __riscv_pasa_wx(rs1, rs2);
 }
+
+// RV64P-LABEL: @pmseq_b(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmseq.b.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmseq_b(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmseq_b(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmseq_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmseq.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmseq_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmseq_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmseq_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmseq.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmseq_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmseq_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmslt_b(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmslt.b.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmslt_b(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmslt_b(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmslt_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmslt.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmslt_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmslt_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmslt_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmslt.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmslt_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmslt_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmsltu_b(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmsltu.b.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmsltu_b(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmsltu_b(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmsltu_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmsltu.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmsltu_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmsltu_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmsltu_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmsltu.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmsltu_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmsltu_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmin_b(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmin.b.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmin_b(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmin_b(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmin_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmin.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmin_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmin_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmin_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmin.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmin_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmin_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @pminu_b(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pminu.b.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pminu_b(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pminu_b(rs1, rs2);
+}
+
+// RV64P-LABEL: @pminu_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pminu.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pminu_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pminu_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @pminu_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pminu.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pminu_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pminu_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmax_b(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmax.b.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmax_b(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmax_b(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmax_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmax.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmax_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmax_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmax_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmax.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmax_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmax_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmaxu_b(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmaxu.b.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmaxu_b(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmaxu_b(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmaxu_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmaxu.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmaxu_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmaxu_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmaxu_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmaxu.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmaxu_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmaxu_w(rs1, rs2);
+}

>From 6200bda462a292ccbcc664531bb02123c894bb0c Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Fri, 5 Sep 2025 12:21:25 +0800
Subject: [PATCH 39/40] [RISCV] Packed High-half Multiply and Accumulate C
 intrinsics

---
 clang/include/clang/Basic/BuiltinsRISCV.td    |  37 ++++
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp    | 128 ++++++++++++
 clang/lib/Headers/riscv_simd.h                | 185 ++++++++++++++++++
 .../RISCV/rvp-intrinsics/riscv32-simd.c       | 153 +++++++++++++++
 .../RISCV/rvp-intrinsics/riscv64-simd.c       | 180 +++++++++++++++++
 5 files changed, 683 insertions(+)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index ffb3edbc7c6d6..966fa8531e6bc 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -247,6 +247,23 @@ def pmax_b_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def pmax_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def pmaxu_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
 def pmaxu_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulh_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulh_h_b0_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulh_h_b1_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulhu_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulhr_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulhru_h_32   : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulhsu_h_32   : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulhsu_h_b0_32: RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulhsu_h_b1_32: RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulhrsu_h_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulh_h1        : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulhr          : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def mulhru         : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulh_h0        : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulhsu_h0      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulhsu_h1      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulhrsu        : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
 } // Features = "experimental-p,32bit"
 
 let Features = "experimental-p,64bit" in {
@@ -400,6 +417,26 @@ def pmax_w         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def pmaxu_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def pmaxu_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 def pmaxu_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulh_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulh_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulh_h_b0_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulh_w_h0     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulh_h_b1_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulh_w_h1     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhu_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhu_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhr_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhr_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhru_h_64   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhru_w      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhsu_h_64   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhsu_w      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhsu_h_b0_64: RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhsu_w_h0   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhsu_h_b1_64: RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhsu_w_h1   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhrsu_h_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhrsu_w     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index 6105b5b3c8e8c..42edf1ddafa72 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -399,6 +399,22 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_pmaxu_h_32:
   case RISCV::BI__builtin_riscv_pmaxu_h_64:
   case RISCV::BI__builtin_riscv_pmaxu_w:
+  case RISCV::BI__builtin_riscv_pmulh_h_32:
+  case RISCV::BI__builtin_riscv_pmulh_h_64:
+  case RISCV::BI__builtin_riscv_pmulh_w:
+  case RISCV::BI__builtin_riscv_pmulhu_h_32:
+  case RISCV::BI__builtin_riscv_pmulhu_h_64:
+  case RISCV::BI__builtin_riscv_pmulhu_w:
+  case RISCV::BI__builtin_riscv_pmulhr_h_32:
+  case RISCV::BI__builtin_riscv_pmulhr_h_64:
+  case RISCV::BI__builtin_riscv_pmulhr_w:
+  case RISCV::BI__builtin_riscv_pmulhru_h_32:
+  case RISCV::BI__builtin_riscv_pmulhru_h_64:
+  case RISCV::BI__builtin_riscv_pmulhru_w:
+  case RISCV::BI__builtin_riscv_mulh_h1:
+  case RISCV::BI__builtin_riscv_mulhr:
+  case RISCV::BI__builtin_riscv_mulhru:
+  case RISCV::BI__builtin_riscv_mulh_h0:
   case RISCV::BI__builtin_riscv_sadd: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -883,6 +899,46 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_pmaxu_w:
       ID = Intrinsic::riscv_pmaxu_w;
       break;
+    case RISCV::BI__builtin_riscv_pmulh_h_32:
+    case RISCV::BI__builtin_riscv_pmulh_h_64:
+      ID = Intrinsic::riscv_pmulh_h;
+      break;
+    case RISCV::BI__builtin_riscv_pmulh_w:
+      ID = Intrinsic::riscv_pmulh_w;
+      break;
+    case RISCV::BI__builtin_riscv_pmulhu_h_32:
+    case RISCV::BI__builtin_riscv_pmulhu_h_64:
+      ID = Intrinsic::riscv_pmulhu_h;
+      break;
+    case RISCV::BI__builtin_riscv_pmulhu_w:
+      ID = Intrinsic::riscv_pmulhu_w;
+      break;
+    case RISCV::BI__builtin_riscv_pmulhr_h_32:
+    case RISCV::BI__builtin_riscv_pmulhr_h_64:
+      ID = Intrinsic::riscv_pmulhr_h;
+      break;
+    case RISCV::BI__builtin_riscv_pmulhr_w:
+      ID = Intrinsic::riscv_pmulhr_w;
+      break;
+    case RISCV::BI__builtin_riscv_pmulhru_h_32:
+    case RISCV::BI__builtin_riscv_pmulhru_h_64:
+      ID = Intrinsic::riscv_pmulhru_h;
+      break;
+    case RISCV::BI__builtin_riscv_pmulhru_w:
+      ID = Intrinsic::riscv_pmulhru_w;
+      break;
+    case RISCV::BI__builtin_riscv_mulh_h1:
+      ID = Intrinsic::riscv_mulh_h1;
+      break;
+    case RISCV::BI__builtin_riscv_mulhr:
+      ID = Intrinsic::riscv_mulhr;
+      break;
+    case RISCV::BI__builtin_riscv_mulhru:
+      ID = Intrinsic::riscv_mulhru;
+      break;
+    case RISCV::BI__builtin_riscv_mulh_h0:
+      ID = Intrinsic::riscv_mulh_h0;
+      break;
     case RISCV::BI__builtin_riscv_sadd:
       ID = Intrinsic::riscv_sadd;
       break;
@@ -920,6 +976,12 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_psati_w:
   case RISCV::BI__builtin_riscv_sati_32:
   case RISCV::BI__builtin_riscv_sati_64:
+  case RISCV::BI__builtin_riscv_pmulhrsu_h_32:
+  case RISCV::BI__builtin_riscv_pmulhrsu_h_64:
+  case RISCV::BI__builtin_riscv_pmulhrsu_w:
+  case RISCV::BI__builtin_riscv_mulhsu_h0:
+  case RISCV::BI__builtin_riscv_mulhsu_h1:
+  case RISCV::BI__builtin_riscv_mulhrsu:
   case RISCV::BI__builtin_riscv_sslai: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -988,6 +1050,22 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_sati_64:
       ID = Intrinsic::riscv_sati;
       break;
+    case RISCV::BI__builtin_riscv_pmulhrsu_h_32:
+    case RISCV::BI__builtin_riscv_pmulhrsu_h_64:
+      ID = Intrinsic::riscv_pmulhrsu_h;
+      break;
+    case RISCV::BI__builtin_riscv_pmulhrsu_w:
+      ID = Intrinsic::riscv_pmulhrsu_w;
+      break;
+    case RISCV::BI__builtin_riscv_mulhsu_h0:
+      ID = Intrinsic::riscv_mulhsu_h0;
+      break;
+    case RISCV::BI__builtin_riscv_mulhsu_h1:
+      ID = Intrinsic::riscv_mulhsu_h1;
+      break;
+    case RISCV::BI__builtin_riscv_mulhrsu:
+      ID = Intrinsic::riscv_mulhrsu;
+      break;
     }
     IntrinsicTypes = {ResultType, Ops[1]->getType()};
     break;
@@ -1019,6 +1097,12 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_mulu_w00:
   case RISCV::BI__builtin_riscv_mulu_h11:
   case RISCV::BI__builtin_riscv_mulu_w11:
+  case RISCV::BI__builtin_riscv_pmulh_h_b0_32:
+  case RISCV::BI__builtin_riscv_pmulh_h_b0_64:
+  case RISCV::BI__builtin_riscv_pmulh_w_h0:
+  case RISCV::BI__builtin_riscv_pmulh_h_b1_32:
+  case RISCV::BI__builtin_riscv_pmulh_h_b1_64:
+  case RISCV::BI__builtin_riscv_pmulh_w_h1:
   case RISCV::BI__builtin_riscv_pmulu_w_h01: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -1088,6 +1172,20 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_mulu_w11:
       ID = Intrinsic::riscv_mulu_w11;
       break;
+    case RISCV::BI__builtin_riscv_pmulh_h_b0_32:
+    case RISCV::BI__builtin_riscv_pmulh_h_b0_64:
+      ID = Intrinsic::riscv_pmulh_h_b0;
+      break;
+    case RISCV::BI__builtin_riscv_pmulh_w_h0:
+      ID = Intrinsic::riscv_pmulh_w_h0;
+      break;
+    case RISCV::BI__builtin_riscv_pmulh_h_b1_32:
+    case RISCV::BI__builtin_riscv_pmulh_h_b1_64:
+      ID = Intrinsic::riscv_pmulh_h_b1;
+      break;
+    case RISCV::BI__builtin_riscv_pmulh_w_h1:
+      ID = Intrinsic::riscv_pmulh_w_h1;
+      break;
     }
     IntrinsicTypes = {ResultType, Ops[0]->getType()};
     break;
@@ -1102,6 +1200,15 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_mulsu_h00:
   case RISCV::BI__builtin_riscv_mulsu_w00:
   case RISCV::BI__builtin_riscv_mulsu_h11:
+  case RISCV::BI__builtin_riscv_pmulhsu_h_32:
+  case RISCV::BI__builtin_riscv_pmulhsu_h_64:
+  case RISCV::BI__builtin_riscv_pmulhsu_w:
+  case RISCV::BI__builtin_riscv_pmulhsu_h_b0_32:
+  case RISCV::BI__builtin_riscv_pmulhsu_h_b0_64:
+  case RISCV::BI__builtin_riscv_pmulhsu_w_h0:
+  case RISCV::BI__builtin_riscv_pmulhsu_h_b1_32:
+  case RISCV::BI__builtin_riscv_pmulhsu_h_b1_64:
+  case RISCV::BI__builtin_riscv_pmulhsu_w_h1:
   case RISCV::BI__builtin_riscv_mulsu_w11: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
@@ -1131,6 +1238,27 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     case RISCV::BI__builtin_riscv_mulsu_w11:
       ID = Intrinsic::riscv_mulsu_w11;
       break;
+    case RISCV::BI__builtin_riscv_pmulhsu_h_32:
+    case RISCV::BI__builtin_riscv_pmulhsu_h_64:
+      ID = Intrinsic::riscv_pmulhsu_h;
+      break;
+    case RISCV::BI__builtin_riscv_pmulhsu_w:
+      ID = Intrinsic::riscv_pmulhsu_w;
+      break;
+    case RISCV::BI__builtin_riscv_pmulhsu_h_b0_32:
+    case RISCV::BI__builtin_riscv_pmulhsu_h_b0_64:
+      ID = Intrinsic::riscv_pmulhsu_h_b0;
+      break;
+    case RISCV::BI__builtin_riscv_pmulhsu_w_h0:
+      ID = Intrinsic::riscv_pmulhsu_w_h0;
+      break;
+    case RISCV::BI__builtin_riscv_pmulhsu_h_b1_32:
+    case RISCV::BI__builtin_riscv_pmulhsu_h_b1_64:
+      ID = Intrinsic::riscv_pmulhsu_h_b1;
+      break;
+    case RISCV::BI__builtin_riscv_pmulhsu_w_h1:
+      ID = Intrinsic::riscv_pmulhsu_w_h1;
+      break;
     }
     IntrinsicTypes = {ResultType, Ops[0]->getType(), Ops[1]->getType()};
     break;
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index 8eca76faa2cef..d0a43d337bf52 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -549,6 +549,91 @@ static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
 __riscv_pmaxu_h(uint32_t __x, uint32_t __y) {
   return __builtin_riscv_pmaxu_h_32(__x, __y);
 }
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulh_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmulh_h_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulh_h_b0(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmulh_h_b0_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulh_h_b1(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmulh_h_b1_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulhu_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmulhu_h_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulhr_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmulhr_h_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulhru_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmulhru_h_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulhsu_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmulhsu_h_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulhsu_h_b0(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmulhsu_h_b0_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulhsu_h_b1(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmulhsu_h_b1_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulhrsu_h(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_pmulhrsu_h_32(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mulh_h1(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_mulh_h1(__x, __y);
+}
+
+static __inline__ int32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mulhr(int32_t __x, int32_t __y) {
+  return __builtin_riscv_mulhr(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mulhru(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_mulhru(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mulh_h0(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_mulh_h0(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mulhsu_h0(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_mulhsu_h0(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mulhsu_h1(uint32_t __x, uint32_t __y) {
+  return __builtin_riscv_mulhsu_h1(__x, __y);
+}
+
+static __inline__ uint32_t __attribute__((__always_inline__, __nodebug__))
+__riscv_mulhrsu(uint32_t __x, int32_t __y) {
+  return __builtin_riscv_mulhrsu(__x, __y);
+}
 #endif
 
 
@@ -1302,6 +1387,106 @@ static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
 __riscv_pmaxu_w(uint64_t __x, uint64_t __y) {
   return __builtin_riscv_pmaxu_w(__x, __y);
 }
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulh_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulh_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulh_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulh_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulh_h_b0(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulh_h_b0_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulh_w_h0(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulh_w_h0(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulh_h_b1(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulh_h_b1_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulh_w_h1(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulh_w_h1(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulhu_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulhu_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulhu_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulhu_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulhr_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulhr_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulhr_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulhr_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulhru_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulhru_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulhru_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulhru_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulhsu_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulhsu_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulhsu_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulhsu_w(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulhsu_h_b0(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulhsu_h_b0_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulhsu_w_h0(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulhsu_w_h0(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulhsu_h_b1(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulhsu_h_b1_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulhsu_w_h1(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulhsu_w_h1(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulhrsu_h(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulhrsu_h_64(__x, __y);
+}
+
+static __inline__ uint64_t __attribute__((__always_inline__, __nodebug__))
+__riscv_pmulhrsu_w(uint64_t __x, uint64_t __y) {
+  return __builtin_riscv_pmulhrsu_w(__x, __y);
+}
 #endif
 
 #endif // defined(__riscv_p)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
index c773a426ed195..f49acd8cffa18 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv32-simd.c
@@ -958,3 +958,156 @@ uint32_t pmaxu_b(uint32_t rs1, uint32_t rs2) {
 uint32_t pmaxu_h(uint32_t rs1, uint32_t rs2) {
   return __riscv_pmaxu_h(rs1, rs2);
 }
+
+// RV32P-LABEL: @pmulh_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmulh.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmulh_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmulh_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmulh_h_b0(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmulh.h.b0.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmulh_h_b0(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmulh_h_b0(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmulh_h_b1(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmulh.h.b1.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmulh_h_b1(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmulh_h_b1(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmulhu_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmulhu.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmulhu_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmulhu_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmulhr_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmulhr.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmulhr_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmulhr_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmulhru_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmulhru.h.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmulhru_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmulhru_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmulhsu_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmulhsu.h.i32.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmulhsu_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmulhsu_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmulhsu_h_b0(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmulhsu.h.b0.i32.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmulhsu_h_b0(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmulhsu_h_b0(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmulhsu_h_b1(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmulhsu.h.b1.i32.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmulhsu_h_b1(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmulhsu_h_b1(rs1, rs2);
+}
+
+// RV32P-LABEL: @pmulhrsu_h(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.pmulhrsu.h.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t pmulhrsu_h(uint32_t rs1, uint32_t rs2) {
+  return __riscv_pmulhrsu_h(rs1, rs2);
+}
+
+// RV32P-LABEL: @mulh_h1(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.mulh.h1.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t mulh_h1(uint32_t rs1, uint32_t rs2) {
+  return __riscv_mulh_h1(rs1, rs2);
+}
+
+// RV32P-LABEL: @mulhr(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.mulhr.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+int32_t mulhr(int32_t rs1, int32_t rs2) {
+  return __riscv_mulhr(rs1, rs2);
+}
+
+// RV32P-LABEL: @mulhru(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.mulhru.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t mulhru(uint32_t rs1, uint32_t rs2) {
+  return __riscv_mulhru(rs1, rs2);
+}
+
+// RV32P-LABEL: @mulh_h0(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.mulh.h0.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t mulh_h0(uint32_t rs1, uint32_t rs2) {
+  return __riscv_mulh_h0(rs1, rs2);
+}
+
+// RV32P-LABEL: @mulhsu_h0(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.mulhsu.h0.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t mulhsu_h0(uint32_t rs1, uint32_t rs2) {
+  return __riscv_mulhsu_h0(rs1, rs2);
+}
+
+// RV32P-LABEL: @mulhsu_h1(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.mulhsu.h1.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t mulhsu_h1(uint32_t rs1, uint32_t rs2) {
+  return __riscv_mulhsu_h1(rs1, rs2);
+}
+
+// RV32P-LABEL: @mulhrsu(
+// RV32P-NEXT:  entry:
+// RV32P-NEXT:    [[TMP0:%.*]] = call i32 @llvm.riscv.mulhrsu.i32.i32(i32 [[RS1:%.*]], i32 [[RS2:%.*]])
+// RV32P-NEXT:    ret i32 [[TMP0]]
+//
+uint32_t mulhrsu(uint32_t rs1, int32_t rs2) {
+  return __riscv_mulhrsu(rs1, rs2);
+}
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
index e5b3badbf7833..a14ce09da0029 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics/riscv64-simd.c
@@ -1371,3 +1371,183 @@ uint64_t pmaxu_h(uint64_t rs1, uint64_t rs2) {
 uint64_t pmaxu_w(uint64_t rs1, uint64_t rs2) {
   return __riscv_pmaxu_w(rs1, rs2);
 }
+
+// RV64P-LABEL: @pmulh_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulh.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulh_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulh_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulh_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulh.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulh_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulh_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulh_h_b0(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulh.h.b0.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulh_h_b0(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulh_h_b0(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulh_w_h0(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulh.w.h0.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulh_w_h0(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulh_w_h0(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulh_h_b1(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulh.h.b1.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulh_h_b1(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulh_h_b1(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulh_w_h1(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulh.w.h1.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulh_w_h1(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulh_w_h1(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulhu_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulhu.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulhu_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulhu_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulhu_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulhu.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulhu_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulhu_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulhr_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulhr.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulhr_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulhr_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulhr_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulhr.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulhr_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulhr_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulhru_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulhru.h.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulhru_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulhru_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulhru_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulhru.w.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulhru_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulhru_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulhsu_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulhsu.h.i64.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulhsu_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulhsu_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulhsu_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulhsu.w.i64.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulhsu_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulhsu_w(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulhsu_h_b0(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulhsu.h.b0.i64.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulhsu_h_b0(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulhsu_h_b0(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulhsu_w_h0(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulhsu.w.h0.i64.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulhsu_w_h0(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulhsu_w_h0(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulhsu_h_b1(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulhsu.h.b1.i64.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulhsu_h_b1(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulhsu_h_b1(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulhsu_w_h1(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulhsu.w.h1.i64.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulhsu_w_h1(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulhsu_w_h1(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulhrsu_h(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulhrsu.h.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulhrsu_h(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulhrsu_h(rs1, rs2);
+}
+
+// RV64P-LABEL: @pmulhrsu_w(
+// RV64P-NEXT:  entry:
+// RV64P-NEXT:    [[TMP0:%.*]] = call i64 @llvm.riscv.pmulhrsu.w.i64.i64(i64 [[RS1:%.*]], i64 [[RS2:%.*]])
+// RV64P-NEXT:    ret i64 [[TMP0]]
+//
+uint64_t pmulhrsu_w(uint64_t rs1, uint64_t rs2) {
+  return __riscv_pmulhrsu_w(rs1, rs2);
+}

>From 6b8627f0a5cc79fe670af18afb5e39bbc328c481 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Fri, 5 Sep 2025 12:55:06 +0800
Subject: [PATCH 40/40] Align spacing and reorder some intrinsics

---
 clang/include/clang/Basic/BuiltinsRISCV.td |  586 +++++------
 clang/lib/CodeGen/TargetBuiltins/RISCV.cpp | 1089 ++++++++++----------
 llvm/include/llvm/IR/IntrinsicsRISCV.td    |   28 +-
 3 files changed, 858 insertions(+), 845 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index 966fa8531e6bc..370ead247a494 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -141,302 +141,302 @@ def sm3p1 : RISCVBuiltin<"unsigned int(unsigned int)">;
 // Packed SIMD extension.
 //===----------------------------------------------------------------------===//
 let Features = "experimental-p,32bit" in {
-def pslli_b_32  : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
-def pslli_h_32  : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
-def psslai_h_32 : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
-def sslai       : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
-def psll_bs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def psll_hs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def padd_bs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def padd_hs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pusati_h_32 : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
-def usati_32    : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
-def psrai_b_32  : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
-def psrai_h_32  : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
-def psrari_h_32 : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
-def srari_32    : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
-def psati_h_32  : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
-def sati_32     : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
-def psrl_bs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def psrl_hs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def predsum_bs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def predsum_hs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def predsumu_bs_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def predsumu_hs_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def psra_bs_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def psra_hs_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def padd_b_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def padd_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def sadd           : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
-def psadd_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def psadd_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def aadd           : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
-def paadd_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def paadd_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def saddu          : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def psaddu_b_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def psaddu_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def aaddu          : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def paaddu_b_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def paaddu_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def psub_b_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def psub_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def ssub           : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
-def pssub_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pssub_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def asub           : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
-def pasub_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pasub_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def ssubu          : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pssubu_b_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pssubu_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def asubu          : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pasubu_b_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pasubu_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pdif_b_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pdif_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pdifu_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pdifu_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmul_h_b01_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmulu_h_b01_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def mul_h01        : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def mulu_h01       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def slx_32         : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def psh1add_h_32   : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def ssh1sadd       : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
-def pssh1sadd_h_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmul_h_b00_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmul_h_b11_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmulu_h_b00_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmulu_h_b11_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmulsu_h_b00_32: RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmulsu_h_b11_32: RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def mul_h00        : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def mul_h11        : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def mulu_h00       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def mulu_h11       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def mulsu_h00      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def mulsu_h11      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def ppack_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def ppackbt_h_32   : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def packbt_32      : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
-def ppacktb_h_32   : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def packtb_32      : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
-def ppackt_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def packt_32       : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
-def pas_hx_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def psa_hx_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def psas_hx_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pssa_hx_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def paas_hx_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pasa_hx_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def mseq           : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
-def pmseq_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmseq_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def mslt           : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
-def pmslt_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmslt_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def msltu          : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmsltu_b_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmsltu_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmin_b_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmin_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pminu_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pminu_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmax_b_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmax_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmaxu_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmaxu_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmulh_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmulh_h_b0_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmulh_h_b1_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmulhu_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmulhr_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmulhru_h_32   : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmulhsu_h_32   : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmulhsu_h_b0_32: RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmulhsu_h_b1_32: RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def pmulhrsu_h_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def mulh_h1        : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def mulhr          : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
-def mulhru         : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def mulh_h0        : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def mulhsu_h0      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def mulhsu_h1      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
-def mulhrsu        : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
+def pslli_b_32      : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
+def pslli_h_32      : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
+def psslai_h_32     : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
+def sslai           : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
+def psll_bs_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psll_hs_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def padd_bs_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def padd_hs_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pusati_h_32     : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
+def usati_32        : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
+def psrai_b_32      : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
+def psrai_h_32      : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
+def psrari_h_32     : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
+def srari_32        : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def psati_h_32      : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
+def sati_32         : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def psrl_bs_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psrl_hs_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def predsum_bs_32   : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def predsum_hs_32   : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def predsumu_bs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def predsumu_hs_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psra_bs_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psra_hs_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def padd_b_32       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def padd_h_32       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def sadd            : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def psadd_b_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psadd_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def aadd            : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def paadd_b_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def paadd_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def saddu           : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psaddu_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psaddu_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def aaddu           : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def paaddu_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def paaddu_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psub_b_32       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psub_h_32       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def ssub            : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def pssub_b_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pssub_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def asub            : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def pasub_b_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pasub_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def ssubu           : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pssubu_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pssubu_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def asubu           : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pasubu_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pasubu_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pdif_b_32       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pdif_h_32       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pdifu_b_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pdifu_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmul_h_b01_32   : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulu_h_b01_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mul_h01         : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulu_h01        : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def slx_32          : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psh1add_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def ssh1sadd        : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def pssh1sadd_h_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmul_h_b00_32   : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmul_h_b11_32   : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulu_h_b00_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulu_h_b11_32  : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulsu_h_b00_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulsu_h_b11_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mul_h00         : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mul_h11         : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulu_h00        : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulu_h11        : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulsu_h00       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulsu_h11       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def ppack_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def ppackbt_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def packbt_32       : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def ppacktb_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def packtb_32       : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def ppackt_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def packt_32        : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def pas_hx_32       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psa_hx_32       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def psas_hx_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pssa_hx_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def paas_hx_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pasa_hx_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mseq            : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def pmseq_b_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmseq_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mslt            : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def pmslt_b_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmslt_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def msltu           : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmsltu_b_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmsltu_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmin_b_32       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmin_h_32       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pminu_b_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pminu_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmax_b_32       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmax_h_32       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmaxu_b_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmaxu_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulh_h_32      : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulh_h_b0_32   : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulh_h_b1_32   : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulhu_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulhr_h_32     : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulhru_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulhsu_h_32    : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulhsu_h_b0_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulhsu_h_b1_32 : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def pmulhrsu_h_32   : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulh_h1         : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulhr           : RISCVBuiltin<"int32_t(int32_t, int32_t)">;
+def mulhru          : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulh_h0         : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulhsu_h0       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulhsu_h1       : RISCVBuiltin<"uint32_t(uint32_t, uint32_t)">;
+def mulhrsu         : RISCVBuiltin<"uint32_t(uint32_t, int32_t)">;
 } // Features = "experimental-p,32bit"
 
 let Features = "experimental-p,64bit" in {
-def pslli_b_64  : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
-def pslli_h_64  : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
-def pslli_w     : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
-def psslai_h_64 : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
-def psslai_w    : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
-def psll_bs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psll_hs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psll_ws     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def padd_bs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def padd_hs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def padd_ws     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pusati_h_64 : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
-def pusati_w    : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
-def usati_64    : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
-def psrai_b_64  : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
-def psrai_h_64  : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
-def psrai_w     : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
-def psrari_h_64 : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
-def psrari_w    : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
-def srari_64    : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
-def psati_h_64  : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
-def psati_w     : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
-def sati_64     : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
-def psrl_bs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psrl_hs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psrl_ws     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def predsum_bs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def predsum_hs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def predsum_ws     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def predsumu_bs_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def predsumu_hs_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def predsumu_ws    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psra_bs_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psra_hs_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psra_ws        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def padd_b_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def padd_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def padd_w         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psadd_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psadd_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psadd_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def paadd_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def paadd_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def paadd_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psaddu_b_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psaddu_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psaddu_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def paaddu_b_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def paaddu_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def paaddu_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psub_b_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psub_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psub_w         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pssub_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pssub_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pssub_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pasub_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pasub_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pasub_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pssubu_b_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pssubu_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pssubu_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pasubu_b_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pasubu_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pasubu_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pdif_b_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pdif_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pdifu_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pdifu_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmul_h_b01_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmul_w_h01     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulu_h_b01_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulu_w_h01    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def mul_w01        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def mulu_w01       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def slx_64         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psh1add_h_64   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psh1add_w      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pssh1sadd_h_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pssh1sadd_w    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def unzip8p        : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
-def unzip16p       : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
-def unzip8hp       : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
-def unzip16hp      : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
-def zip8p          : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
-def zip16p         : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
-def zip8hp         : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
-def zip16hp        : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
-def pmul_h_b00_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmul_w_h00     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmul_h_b11_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmul_w_h11     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulu_h_b00_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulu_w_h00    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulu_h_b11_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulu_w_h11    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulsu_h_b00_64: RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulsu_w_h00   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulsu_h_b11_64: RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulsu_w_h11   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def mul_w00        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def mul_w11        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def mulu_w00       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def mulu_w11       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def mulsu_w00      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def mulsu_w11      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def ppack_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def ppack_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def ppackbt_h_64   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def ppackbt_w      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def packbt_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def ppacktb_h_64   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def ppacktb_w      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def packtb_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def ppackt_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def ppackt_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def packt_64       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pas_hx_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pas_wx         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psa_hx_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psa_wx         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psas_hx_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def psas_wx        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pssa_hx_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pssa_wx        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def paas_hx_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def paas_wx        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pasa_hx_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pasa_wx        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmseq_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmseq_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmseq_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmslt_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmslt_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmslt_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmsltu_b_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmsltu_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmsltu_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmin_b_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmin_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmin_w         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pminu_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pminu_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pminu_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmax_b_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmax_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmax_w         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmaxu_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmaxu_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmaxu_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulh_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulh_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulh_h_b0_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulh_w_h0     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulh_h_b1_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulh_w_h1     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulhu_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulhu_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulhr_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulhr_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulhru_h_64   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulhru_w      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulhsu_h_64   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulhsu_w      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulhsu_h_b0_64: RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulhsu_w_h0   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulhsu_h_b1_64: RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulhsu_w_h1   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulhrsu_h_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
-def pmulhrsu_w     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pslli_b_64      : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def pslli_h_64      : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def pslli_w         : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def psslai_h_64     : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def psslai_w        : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def psll_bs_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psll_hs_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psll_ws         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def padd_bs_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def padd_hs_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def padd_ws         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pusati_h_64     : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def pusati_w        : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def usati_64        : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def psrai_b_64      : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def psrai_h_64      : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def psrai_w         : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def psrari_h_64     : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def psrari_w        : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def srari_64        : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
+def psati_h_64      : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def psati_w         : RISCVBuiltin<"uint64_t(uint64_t, int64_t)">;
+def sati_64         : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
+def psrl_bs_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psrl_hs_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psrl_ws         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def predsum_bs_64   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def predsum_hs_64   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def predsum_ws      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def predsumu_bs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def predsumu_hs_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def predsumu_ws     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psra_bs_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psra_hs_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psra_ws         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def padd_b_64       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def padd_h_64       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def padd_w          : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psadd_b_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psadd_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psadd_w         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def paadd_b_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def paadd_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def paadd_w         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psaddu_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psaddu_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psaddu_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def paaddu_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def paaddu_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def paaddu_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psub_b_64       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psub_h_64       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psub_w          : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pssub_b_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pssub_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pssub_w         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pasub_b_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pasub_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pasub_w         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pssubu_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pssubu_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pssubu_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pasubu_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pasubu_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pasubu_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pdif_b_64       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pdif_h_64       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pdifu_b_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pdifu_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmul_h_b01_64   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmul_w_h01      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulu_h_b01_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulu_w_h01     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def mul_w01         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def mulu_w01        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def slx_64          : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psh1add_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psh1add_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pssh1sadd_h_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pssh1sadd_w     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def unzip8p         : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
+def unzip16p        : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
+def unzip8hp        : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
+def unzip16hp       : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
+def zip8p           : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
+def zip16p          : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
+def zip8hp          : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
+def zip16hp         : RISCVBuiltin<"int64_t(int64_t, int64_t)">;
+def pmul_h_b00_64   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmul_w_h00      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmul_h_b11_64   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmul_w_h11      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulu_h_b00_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulu_w_h00     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulu_h_b11_64  : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulu_w_h11     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulsu_h_b00_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulsu_w_h00    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulsu_h_b11_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulsu_w_h11    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def mul_w00         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def mul_w11         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def mulu_w00        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def mulu_w11        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def mulsu_w00       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def mulsu_w11       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def ppack_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def ppack_w         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def ppackbt_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def ppackbt_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def packbt_64       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def ppacktb_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def ppacktb_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def packtb_64       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def ppackt_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def ppackt_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def packt_64        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pas_hx_64       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pas_wx          : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psa_hx_64       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psa_wx          : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psas_hx_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def psas_wx         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pssa_hx_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pssa_wx         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def paas_hx_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def paas_wx         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pasa_hx_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pasa_wx         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmseq_b_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmseq_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmseq_w         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmslt_b_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmslt_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmslt_w         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmsltu_b_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmsltu_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmsltu_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmin_b_64       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmin_h_64       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmin_w          : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pminu_b_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pminu_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pminu_w         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmax_b_64       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmax_h_64       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmax_w          : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmaxu_b_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmaxu_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmaxu_w         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulh_h_64      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulh_w         : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulh_h_b0_64   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulh_w_h0      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulh_h_b1_64   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulh_w_h1      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhu_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhu_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhr_h_64     : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhr_w        : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhru_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhru_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhsu_h_64    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhsu_w       : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhsu_h_b0_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhsu_w_h0    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhsu_h_b1_64 : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhsu_w_h1    : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhrsu_h_64   : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
+def pmulhrsu_w      : RISCVBuiltin<"uint64_t(uint64_t, uint64_t)">;
 } // Features = "experimental-p,64bit"
 } // Attributes = [Const, NoThrow]
 
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index 42edf1ddafa72..2cf9fccecacb5 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -208,7 +208,59 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_brev8_32:
   case RISCV::BI__builtin_riscv_brev8_64:
   case RISCV::BI__builtin_riscv_zip_32:
-  case RISCV::BI__builtin_riscv_unzip_32:
+  case RISCV::BI__builtin_riscv_unzip_32: {
+    switch (BuiltinID) {
+    default: llvm_unreachable("unexpected builtin ID");
+    // Zbb
+    case RISCV::BI__builtin_riscv_orc_b_32:
+    case RISCV::BI__builtin_riscv_orc_b_64:
+      ID = Intrinsic::riscv_orc_b;
+      break;
+
+    // Zbc
+    case RISCV::BI__builtin_riscv_clmul_32:
+    case RISCV::BI__builtin_riscv_clmul_64:
+      ID = Intrinsic::riscv_clmul;
+      break;
+    case RISCV::BI__builtin_riscv_clmulh_32:
+    case RISCV::BI__builtin_riscv_clmulh_64:
+      ID = Intrinsic::riscv_clmulh;
+      break;
+    case RISCV::BI__builtin_riscv_clmulr_32:
+    case RISCV::BI__builtin_riscv_clmulr_64:
+      ID = Intrinsic::riscv_clmulr;
+      break;
+
+    // Zbkx
+    case RISCV::BI__builtin_riscv_xperm8_32:
+    case RISCV::BI__builtin_riscv_xperm8_64:
+      ID = Intrinsic::riscv_xperm8;
+      break;
+    case RISCV::BI__builtin_riscv_xperm4_32:
+    case RISCV::BI__builtin_riscv_xperm4_64:
+      ID = Intrinsic::riscv_xperm4;
+      break;
+
+    // Zbkb
+    case RISCV::BI__builtin_riscv_brev8_32:
+    case RISCV::BI__builtin_riscv_brev8_64:
+      ID = Intrinsic::riscv_brev8;
+      break;
+    case RISCV::BI__builtin_riscv_zip_32:
+      ID = Intrinsic::riscv_zip;
+      break;
+    case RISCV::BI__builtin_riscv_unzip_32:
+      ID = Intrinsic::riscv_unzip;
+      break;
+    }
+
+    IntrinsicTypes = {ResultType};
+    break;
+  }
+
+  // RISCV Packed SIMD extension
+
+  // Intrinsic type is obtained from Ops[0].
   case RISCV::BI__builtin_riscv_psll_bs_32:
   case RISCV::BI__builtin_riscv_psll_bs_64:
   case RISCV::BI__builtin_riscv_psll_hs_32:
@@ -244,6 +296,7 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_padd_h_32:
   case RISCV::BI__builtin_riscv_padd_h_64:
   case RISCV::BI__builtin_riscv_padd_w:
+  case RISCV::BI__builtin_riscv_sadd:
   case RISCV::BI__builtin_riscv_psadd_b_32:
   case RISCV::BI__builtin_riscv_psadd_b_64:
   case RISCV::BI__builtin_riscv_psadd_h_32:
@@ -414,540 +467,497 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_mulh_h1:
   case RISCV::BI__builtin_riscv_mulhr:
   case RISCV::BI__builtin_riscv_mulhru:
-  case RISCV::BI__builtin_riscv_mulh_h0:
-  case RISCV::BI__builtin_riscv_sadd: {
-    switch (BuiltinID) {
-    default: llvm_unreachable("unexpected builtin ID");
-    // Zbb
-    case RISCV::BI__builtin_riscv_orc_b_32:
-    case RISCV::BI__builtin_riscv_orc_b_64:
-      ID = Intrinsic::riscv_orc_b;
-      break;
-
-    // Zbc
-    case RISCV::BI__builtin_riscv_clmul_32:
-    case RISCV::BI__builtin_riscv_clmul_64:
-      ID = Intrinsic::riscv_clmul;
-      break;
-    case RISCV::BI__builtin_riscv_clmulh_32:
-    case RISCV::BI__builtin_riscv_clmulh_64:
-      ID = Intrinsic::riscv_clmulh;
-      break;
-    case RISCV::BI__builtin_riscv_clmulr_32:
-    case RISCV::BI__builtin_riscv_clmulr_64:
-      ID = Intrinsic::riscv_clmulr;
-      break;
-
-    // Zbkx
-    case RISCV::BI__builtin_riscv_xperm8_32:
-    case RISCV::BI__builtin_riscv_xperm8_64:
-      ID = Intrinsic::riscv_xperm8;
-      break;
-    case RISCV::BI__builtin_riscv_xperm4_32:
-    case RISCV::BI__builtin_riscv_xperm4_64:
-      ID = Intrinsic::riscv_xperm4;
-      break;
-
-    // Zbkb
-    case RISCV::BI__builtin_riscv_brev8_32:
-    case RISCV::BI__builtin_riscv_brev8_64:
-      ID = Intrinsic::riscv_brev8;
-      break;
-    case RISCV::BI__builtin_riscv_zip_32:
-      ID = Intrinsic::riscv_zip;
-      break;
-    case RISCV::BI__builtin_riscv_unzip_32:
-      ID = Intrinsic::riscv_unzip;
-      break;
-
-    // Packed SIMD
-    case RISCV::BI__builtin_riscv_psll_bs_32:
-    case RISCV::BI__builtin_riscv_psll_bs_64:
-      ID = Intrinsic::riscv_psll_bs;
-      break;
-    case RISCV::BI__builtin_riscv_psll_hs_32:
-    case RISCV::BI__builtin_riscv_psll_hs_64:
-      ID = Intrinsic::riscv_psll_hs;
-      break;
-    case RISCV::BI__builtin_riscv_psll_ws:
-      ID = Intrinsic::riscv_psll_ws;
-      break;
-    case RISCV::BI__builtin_riscv_padd_bs_32:
-    case RISCV::BI__builtin_riscv_padd_bs_64:
-      ID = Intrinsic::riscv_padd_bs;
-      break;
-    case RISCV::BI__builtin_riscv_padd_hs_32:
-    case RISCV::BI__builtin_riscv_padd_hs_64:
-      ID = Intrinsic::riscv_padd_hs;
-      break;
-    case RISCV::BI__builtin_riscv_padd_ws:
-      ID = Intrinsic::riscv_padd_ws;
-      break;
-    case RISCV::BI__builtin_riscv_psrl_bs_32:
-    case RISCV::BI__builtin_riscv_psrl_bs_64:
-      ID = Intrinsic::riscv_psrl_bs;
-      break;
-    case RISCV::BI__builtin_riscv_psrl_hs_32:
-    case RISCV::BI__builtin_riscv_psrl_hs_64:
-      ID = Intrinsic::riscv_psrl_hs;
-      break;
-    case RISCV::BI__builtin_riscv_psrl_ws:
-      ID = Intrinsic::riscv_psrl_ws;
-      break;
-    case RISCV::BI__builtin_riscv_predsum_bs_32:
-    case RISCV::BI__builtin_riscv_predsum_bs_64:
-      ID = Intrinsic::riscv_predsum_bs;
-      break;
-    case RISCV::BI__builtin_riscv_predsum_hs_32:
-    case RISCV::BI__builtin_riscv_predsum_hs_64:
-      ID = Intrinsic::riscv_predsum_hs;
-      break;
-    case RISCV::BI__builtin_riscv_predsum_ws:
-      ID = Intrinsic::riscv_predsum_ws;
-      break;
-    case RISCV::BI__builtin_riscv_predsumu_bs_32:
-    case RISCV::BI__builtin_riscv_predsumu_bs_64:
-      ID = Intrinsic::riscv_predsumu_bs;
-      break;
-    case RISCV::BI__builtin_riscv_predsumu_hs_32:
-    case RISCV::BI__builtin_riscv_predsumu_hs_64:
-      ID = Intrinsic::riscv_predsumu_hs;
-      break;
-    case RISCV::BI__builtin_riscv_predsumu_ws:
-      ID = Intrinsic::riscv_predsumu_ws;
-      break;
-    case RISCV::BI__builtin_riscv_psra_bs_32:
-    case RISCV::BI__builtin_riscv_psra_bs_64:
-      ID = Intrinsic::riscv_psra_bs;
-      break;
-    case RISCV::BI__builtin_riscv_psra_hs_32:
-    case RISCV::BI__builtin_riscv_psra_hs_64:
-      ID = Intrinsic::riscv_psra_hs;
-      break;
-    case RISCV::BI__builtin_riscv_psra_ws:
-      ID = Intrinsic::riscv_psra_ws;
-      break;
-    case RISCV::BI__builtin_riscv_padd_b_32:
-    case RISCV::BI__builtin_riscv_padd_b_64:
-      ID = Intrinsic::riscv_padd_b;
-      break;
-    case RISCV::BI__builtin_riscv_padd_h_32:
-    case RISCV::BI__builtin_riscv_padd_h_64:
-      ID = Intrinsic::riscv_padd_h;
-      break;
-    case RISCV::BI__builtin_riscv_padd_w:
-      ID = Intrinsic::riscv_padd_w;
-      break;
-    case RISCV::BI__builtin_riscv_psadd_b_32:
-    case RISCV::BI__builtin_riscv_psadd_b_64:
-      ID = Intrinsic::riscv_psadd_b;
-      break;
-    case RISCV::BI__builtin_riscv_psadd_h_32:
-    case RISCV::BI__builtin_riscv_psadd_h_64:
-      ID = Intrinsic::riscv_psadd_h;
-      break;
-    case RISCV::BI__builtin_riscv_psadd_w:
-      ID = Intrinsic::riscv_psadd_w;
-      break;
-    case RISCV::BI__builtin_riscv_aadd:
-      ID = Intrinsic::riscv_aadd;
-      break;
-    case RISCV::BI__builtin_riscv_paadd_b_32:
-    case RISCV::BI__builtin_riscv_paadd_b_64:
-      ID = Intrinsic::riscv_paadd_b;
-      break;
-    case RISCV::BI__builtin_riscv_paadd_h_32:
-    case RISCV::BI__builtin_riscv_paadd_h_64:
-      ID = Intrinsic::riscv_paadd_h;
-      break;
-    case RISCV::BI__builtin_riscv_paadd_w:
-      ID = Intrinsic::riscv_paadd_w;
-      break;
-    case RISCV::BI__builtin_riscv_saddu:
-      ID = Intrinsic::riscv_saddu;
-      break;
-    case RISCV::BI__builtin_riscv_psaddu_b_32:
-    case RISCV::BI__builtin_riscv_psaddu_b_64:
-      ID = Intrinsic::riscv_psaddu_b;
-      break;
-    case RISCV::BI__builtin_riscv_psaddu_h_32:
-    case RISCV::BI__builtin_riscv_psaddu_h_64:
-      ID = Intrinsic::riscv_psaddu_h;
-      break;
-    case RISCV::BI__builtin_riscv_psaddu_w:
-      ID = Intrinsic::riscv_psaddu_w;
-      break;
-    case RISCV::BI__builtin_riscv_aaddu:
-      ID = Intrinsic::riscv_aaddu;
-      break;
-    case RISCV::BI__builtin_riscv_paaddu_b_32:
-    case RISCV::BI__builtin_riscv_paaddu_b_64:
-      ID = Intrinsic::riscv_paaddu_b;
-      break;
-    case RISCV::BI__builtin_riscv_paaddu_h_32:
-    case RISCV::BI__builtin_riscv_paaddu_h_64:
-      ID = Intrinsic::riscv_paaddu_h;
-      break;
-    case RISCV::BI__builtin_riscv_paaddu_w:
-      ID = Intrinsic::riscv_paaddu_w;
-      break;
-    case RISCV::BI__builtin_riscv_psub_b_32:
-    case RISCV::BI__builtin_riscv_psub_b_64:
-      ID = Intrinsic::riscv_psub_b;
-      break;
-    case RISCV::BI__builtin_riscv_psub_h_32:
-    case RISCV::BI__builtin_riscv_psub_h_64:
-      ID = Intrinsic::riscv_psub_h;
-      break;
-    case RISCV::BI__builtin_riscv_psub_w:
-      ID = Intrinsic::riscv_psub_w;
-      break;
-    case RISCV::BI__builtin_riscv_ssub:
-      ID = Intrinsic::riscv_ssub;
-      break;
-    case RISCV::BI__builtin_riscv_pssub_b_32:
-    case RISCV::BI__builtin_riscv_pssub_b_64:
-      ID = Intrinsic::riscv_pssub_b;
-      break;
-    case RISCV::BI__builtin_riscv_pssub_h_32:
-    case RISCV::BI__builtin_riscv_pssub_h_64:
-      ID = Intrinsic::riscv_pssub_h;
-      break;
-    case RISCV::BI__builtin_riscv_pssub_w:
-      ID = Intrinsic::riscv_pssub_w;
-      break;
-    case RISCV::BI__builtin_riscv_asub:
-      ID = Intrinsic::riscv_asub;
-      break;
-    case RISCV::BI__builtin_riscv_pasub_b_32:
-    case RISCV::BI__builtin_riscv_pasub_b_64:
-      ID = Intrinsic::riscv_pasub_b;
-      break;
-    case RISCV::BI__builtin_riscv_pasub_h_32:
-    case RISCV::BI__builtin_riscv_pasub_h_64:
-      ID = Intrinsic::riscv_pasub_h;
-      break;
-    case RISCV::BI__builtin_riscv_pasub_w:
-      ID = Intrinsic::riscv_pasub_w;
-      break;
-    case RISCV::BI__builtin_riscv_ssubu:
-      ID = Intrinsic::riscv_ssubu;
-      break;
-    case RISCV::BI__builtin_riscv_pssubu_b_32:
-    case RISCV::BI__builtin_riscv_pssubu_b_64:
-      ID = Intrinsic::riscv_pssubu_b;
-      break;
-    case RISCV::BI__builtin_riscv_pssubu_h_32:
-    case RISCV::BI__builtin_riscv_pssubu_h_64:
-      ID = Intrinsic::riscv_pssubu_h;
-      break;
-    case RISCV::BI__builtin_riscv_pssubu_w:
-      ID = Intrinsic::riscv_pssubu_w;
-      break;
-    case RISCV::BI__builtin_riscv_asubu:
-      ID = Intrinsic::riscv_asubu;
-      break;
-    case RISCV::BI__builtin_riscv_pasubu_b_32:
-    case RISCV::BI__builtin_riscv_pasubu_b_64:
-      ID = Intrinsic::riscv_pasubu_b;
-      break;
-    case RISCV::BI__builtin_riscv_pasubu_h_32:
-    case RISCV::BI__builtin_riscv_pasubu_h_64:
-      ID = Intrinsic::riscv_pasubu_h;
-      break;
-    case RISCV::BI__builtin_riscv_pasubu_w:
-      ID = Intrinsic::riscv_pasubu_w;
-      break;
-    case RISCV::BI__builtin_riscv_pdif_b_32:
-    case RISCV::BI__builtin_riscv_pdif_b_64:
-      ID = Intrinsic::riscv_pdif_b;
-      break;
-    case RISCV::BI__builtin_riscv_pdif_h_32:
-    case RISCV::BI__builtin_riscv_pdif_h_64:
-      ID = Intrinsic::riscv_pdif_h;
-      break;
-    case RISCV::BI__builtin_riscv_pdifu_b_32:
-    case RISCV::BI__builtin_riscv_pdifu_b_64:
-      ID = Intrinsic::riscv_pdifu_b;
-      break;
-    case RISCV::BI__builtin_riscv_pdifu_h_32:
-    case RISCV::BI__builtin_riscv_pdifu_h_64:
-      ID = Intrinsic::riscv_pdifu_h;
-      break;
-    case RISCV::BI__builtin_riscv_mul_h01:
-      ID = Intrinsic::riscv_mul_h01;
-      break;
-    case RISCV::BI__builtin_riscv_mul_w01:
-      ID = Intrinsic::riscv_mul_w01;
-      break;
-    case RISCV::BI__builtin_riscv_mulu_h01:
-      ID = Intrinsic::riscv_mulu_h01;
-      break;
-    case RISCV::BI__builtin_riscv_mulu_w01:
-      ID = Intrinsic::riscv_mulu_w01;
-      break;
-    case RISCV::BI__builtin_riscv_slx_32:
-    case RISCV::BI__builtin_riscv_slx_64:
-      ID = Intrinsic::riscv_slx;
-      break;
-    case RISCV::BI__builtin_riscv_psh1add_h_32:
-    case RISCV::BI__builtin_riscv_psh1add_h_64:
-      ID = Intrinsic::riscv_psh1add_h;
-      break;
-    case RISCV::BI__builtin_riscv_psh1add_w:
-      ID = Intrinsic::riscv_psh1add_w;
-      break;
-    case RISCV::BI__builtin_riscv_ssh1sadd:
-      ID = Intrinsic::riscv_ssh1sadd;
-      break;
-    case RISCV::BI__builtin_riscv_pssh1sadd_h_32:
-    case RISCV::BI__builtin_riscv_pssh1sadd_h_64:
-      ID = Intrinsic::riscv_pssh1sadd_h;
-      break;
-    case RISCV::BI__builtin_riscv_pssh1sadd_w:
-      ID = Intrinsic::riscv_pssh1sadd_w;
-      break;
-    case RISCV::BI__builtin_riscv_unzip8p:
-      ID = Intrinsic::riscv_unzip8p;
-      break;
-    case RISCV::BI__builtin_riscv_unzip16p:
-      ID = Intrinsic::riscv_unzip16p;
-      break;
-    case RISCV::BI__builtin_riscv_unzip8hp:
-      ID = Intrinsic::riscv_unzip8hp;
-      break;
-    case RISCV::BI__builtin_riscv_unzip16hp:
-      ID = Intrinsic::riscv_unzip16hp;
-      break;
-    case RISCV::BI__builtin_riscv_zip8p:
-      ID = Intrinsic::riscv_zip8p;
-      break;
-    case RISCV::BI__builtin_riscv_zip16p:
-      ID = Intrinsic::riscv_zip16p;
-      break;
-    case RISCV::BI__builtin_riscv_zip8hp:
-      ID = Intrinsic::riscv_zip8hp;
-      break;
-    case RISCV::BI__builtin_riscv_zip16hp:
-      ID = Intrinsic::riscv_zip16hp;
-      break;
-    case RISCV::BI__builtin_riscv_ppack_h_32:
-    case RISCV::BI__builtin_riscv_ppack_h_64:
-      ID = Intrinsic::riscv_ppack_h;
-      break;
-    case RISCV::BI__builtin_riscv_ppack_w:
-      ID = Intrinsic::riscv_ppack_w;
-      break;
-    case RISCV::BI__builtin_riscv_ppackbt_h_32:
-    case RISCV::BI__builtin_riscv_ppackbt_h_64:
-      ID = Intrinsic::riscv_ppackbt_h;
-      break;
-    case RISCV::BI__builtin_riscv_ppackbt_w:
-      ID = Intrinsic::riscv_ppackbt_w;
-      break;
-    case RISCV::BI__builtin_riscv_packbt_32:
-    case RISCV::BI__builtin_riscv_packbt_64:
-      ID = Intrinsic::riscv_packbt;
-      break;
-    case RISCV::BI__builtin_riscv_ppacktb_h_32:
-    case RISCV::BI__builtin_riscv_ppacktb_h_64:
-      ID = Intrinsic::riscv_ppacktb_h;
-      break;
-    case RISCV::BI__builtin_riscv_ppacktb_w:
-      ID = Intrinsic::riscv_ppacktb_w;
-      break;
-    case RISCV::BI__builtin_riscv_packtb_32:
-    case RISCV::BI__builtin_riscv_packtb_64:
-      ID = Intrinsic::riscv_packtb;
-      break;
-    case RISCV::BI__builtin_riscv_ppackt_h_32:
-    case RISCV::BI__builtin_riscv_ppackt_h_64:
-      ID = Intrinsic::riscv_ppackt_h;
-      break;
-    case RISCV::BI__builtin_riscv_ppackt_w:
-      ID = Intrinsic::riscv_ppackt_w;
-      break;
-    case RISCV::BI__builtin_riscv_packt_32:
-    case RISCV::BI__builtin_riscv_packt_64:
-      ID = Intrinsic::riscv_packt;
-      break;
-    case RISCV::BI__builtin_riscv_pas_hx_32:
-    case RISCV::BI__builtin_riscv_pas_hx_64:
-      ID = Intrinsic::riscv_pas_hx;
-      break;
-    case RISCV::BI__builtin_riscv_pas_wx:
-      ID = Intrinsic::riscv_pas_wx;
-      break;
-    case RISCV::BI__builtin_riscv_psa_hx_32:
-    case RISCV::BI__builtin_riscv_psa_hx_64:
-      ID = Intrinsic::riscv_psa_hx;
-      break;
-    case RISCV::BI__builtin_riscv_psa_wx:
-      ID = Intrinsic::riscv_psa_wx;
-      break;
-    case RISCV::BI__builtin_riscv_psas_hx_32:
-    case RISCV::BI__builtin_riscv_psas_hx_64:
-      ID = Intrinsic::riscv_psas_hx;
-      break;
-    case RISCV::BI__builtin_riscv_psas_wx:
-      ID = Intrinsic::riscv_psas_wx;
-      break;
-    case RISCV::BI__builtin_riscv_pssa_hx_32:
-    case RISCV::BI__builtin_riscv_pssa_hx_64:
-      ID = Intrinsic::riscv_pssa_hx;
-      break;
-    case RISCV::BI__builtin_riscv_pssa_wx:
-      ID = Intrinsic::riscv_pssa_wx;
-      break;
-    case RISCV::BI__builtin_riscv_paas_hx_32:
-    case RISCV::BI__builtin_riscv_paas_hx_64:
-      ID = Intrinsic::riscv_paas_hx;
-      break;
-    case RISCV::BI__builtin_riscv_paas_wx:
-      ID = Intrinsic::riscv_paas_wx;
-      break;
-    case RISCV::BI__builtin_riscv_pasa_hx_32:
-    case RISCV::BI__builtin_riscv_pasa_hx_64:
-      ID = Intrinsic::riscv_pasa_hx;
-      break;
-    case RISCV::BI__builtin_riscv_pasa_wx:
-      ID = Intrinsic::riscv_pasa_wx;
-      break;
-    case RISCV::BI__builtin_riscv_mseq:
-      ID = Intrinsic::riscv_mseq;
-      break;
-    case RISCV::BI__builtin_riscv_pmseq_b_32:
-    case RISCV::BI__builtin_riscv_pmseq_b_64:
-      ID = Intrinsic::riscv_pmseq_b;
-      break;
-    case RISCV::BI__builtin_riscv_pmseq_h_32:
-    case RISCV::BI__builtin_riscv_pmseq_h_64:
-      ID = Intrinsic::riscv_pmseq_h;
-      break;
-    case RISCV::BI__builtin_riscv_pmseq_w:
-      ID = Intrinsic::riscv_pmseq_w;
-      break;
-    case RISCV::BI__builtin_riscv_mslt:
-      ID = Intrinsic::riscv_mslt;
-      break;
-    case RISCV::BI__builtin_riscv_pmslt_b_32:
-    case RISCV::BI__builtin_riscv_pmslt_b_64:
-      ID = Intrinsic::riscv_pmslt_b;
-      break;
-    case RISCV::BI__builtin_riscv_pmslt_h_32:
-    case RISCV::BI__builtin_riscv_pmslt_h_64:
-      ID = Intrinsic::riscv_pmslt_h;
-      break;
-    case RISCV::BI__builtin_riscv_pmslt_w:
-      ID = Intrinsic::riscv_pmslt_w;
-      break;
-    case RISCV::BI__builtin_riscv_msltu:
-      ID = Intrinsic::riscv_msltu;
-      break;
-    case RISCV::BI__builtin_riscv_pmsltu_b_32:
-    case RISCV::BI__builtin_riscv_pmsltu_b_64:
-      ID = Intrinsic::riscv_pmsltu_b;
-      break;
-    case RISCV::BI__builtin_riscv_pmsltu_h_32:
-    case RISCV::BI__builtin_riscv_pmsltu_h_64:
-      ID = Intrinsic::riscv_pmsltu_h;
-      break;
-    case RISCV::BI__builtin_riscv_pmsltu_w:
-      ID = Intrinsic::riscv_pmsltu_w;
-      break;
-    case RISCV::BI__builtin_riscv_pmin_b_32:
-    case RISCV::BI__builtin_riscv_pmin_b_64:
-      ID = Intrinsic::riscv_pmin_b;
-      break;
-    case RISCV::BI__builtin_riscv_pmin_h_32:
-    case RISCV::BI__builtin_riscv_pmin_h_64:
-      ID = Intrinsic::riscv_pmin_h;
-      break;
-    case RISCV::BI__builtin_riscv_pmin_w:
-      ID = Intrinsic::riscv_pmin_w;
-      break;
-    case RISCV::BI__builtin_riscv_pminu_b_32:
-    case RISCV::BI__builtin_riscv_pminu_b_64:
-      ID = Intrinsic::riscv_pminu_b;
-      break;
-    case RISCV::BI__builtin_riscv_pminu_h_32:
-    case RISCV::BI__builtin_riscv_pminu_h_64:
-      ID = Intrinsic::riscv_pminu_h;
-      break;
-    case RISCV::BI__builtin_riscv_pminu_w:
-      ID = Intrinsic::riscv_pminu_w;
-      break;
-    case RISCV::BI__builtin_riscv_pmax_b_32:
-    case RISCV::BI__builtin_riscv_pmax_b_64:
-      ID = Intrinsic::riscv_pmax_b;
-      break;
-    case RISCV::BI__builtin_riscv_pmax_h_32:
-    case RISCV::BI__builtin_riscv_pmax_h_64:
-      ID = Intrinsic::riscv_pmax_h;
-      break;
-    case RISCV::BI__builtin_riscv_pmax_w:
-      ID = Intrinsic::riscv_pmax_w;
-      break;
-    case RISCV::BI__builtin_riscv_pmaxu_b_32:
-    case RISCV::BI__builtin_riscv_pmaxu_b_64:
-      ID = Intrinsic::riscv_pmaxu_b;
-      break;
-    case RISCV::BI__builtin_riscv_pmaxu_h_32:
-    case RISCV::BI__builtin_riscv_pmaxu_h_64:
-      ID = Intrinsic::riscv_pmaxu_h;
-      break;
-    case RISCV::BI__builtin_riscv_pmaxu_w:
-      ID = Intrinsic::riscv_pmaxu_w;
-      break;
-    case RISCV::BI__builtin_riscv_pmulh_h_32:
-    case RISCV::BI__builtin_riscv_pmulh_h_64:
-      ID = Intrinsic::riscv_pmulh_h;
-      break;
-    case RISCV::BI__builtin_riscv_pmulh_w:
-      ID = Intrinsic::riscv_pmulh_w;
-      break;
-    case RISCV::BI__builtin_riscv_pmulhu_h_32:
-    case RISCV::BI__builtin_riscv_pmulhu_h_64:
-      ID = Intrinsic::riscv_pmulhu_h;
-      break;
-    case RISCV::BI__builtin_riscv_pmulhu_w:
-      ID = Intrinsic::riscv_pmulhu_w;
-      break;
-    case RISCV::BI__builtin_riscv_pmulhr_h_32:
-    case RISCV::BI__builtin_riscv_pmulhr_h_64:
-      ID = Intrinsic::riscv_pmulhr_h;
-      break;
-    case RISCV::BI__builtin_riscv_pmulhr_w:
-      ID = Intrinsic::riscv_pmulhr_w;
-      break;
-    case RISCV::BI__builtin_riscv_pmulhru_h_32:
-    case RISCV::BI__builtin_riscv_pmulhru_h_64:
-      ID = Intrinsic::riscv_pmulhru_h;
-      break;
-    case RISCV::BI__builtin_riscv_pmulhru_w:
-      ID = Intrinsic::riscv_pmulhru_w;
-      break;
-    case RISCV::BI__builtin_riscv_mulh_h1:
-      ID = Intrinsic::riscv_mulh_h1;
-      break;
-    case RISCV::BI__builtin_riscv_mulhr:
-      ID = Intrinsic::riscv_mulhr;
-      break;
-    case RISCV::BI__builtin_riscv_mulhru:
-      ID = Intrinsic::riscv_mulhru;
-      break;
-    case RISCV::BI__builtin_riscv_mulh_h0:
-      ID = Intrinsic::riscv_mulh_h0;
-      break;
-    case RISCV::BI__builtin_riscv_sadd:
-      ID = Intrinsic::riscv_sadd;
-      break;
+  case RISCV::BI__builtin_riscv_mulh_h0: {
+      switch (BuiltinID) {
+      default: llvm_unreachable("unexpected builtin ID");
+      case RISCV::BI__builtin_riscv_psll_bs_32:
+      case RISCV::BI__builtin_riscv_psll_bs_64:
+        ID = Intrinsic::riscv_psll_bs;
+        break;
+      case RISCV::BI__builtin_riscv_psll_hs_32:
+      case RISCV::BI__builtin_riscv_psll_hs_64:
+        ID = Intrinsic::riscv_psll_hs;
+        break;
+      case RISCV::BI__builtin_riscv_psll_ws:
+        ID = Intrinsic::riscv_psll_ws;
+        break;
+      case RISCV::BI__builtin_riscv_padd_bs_32:
+      case RISCV::BI__builtin_riscv_padd_bs_64:
+        ID = Intrinsic::riscv_padd_bs;
+        break;
+      case RISCV::BI__builtin_riscv_padd_hs_32:
+      case RISCV::BI__builtin_riscv_padd_hs_64:
+        ID = Intrinsic::riscv_padd_hs;
+        break;
+      case RISCV::BI__builtin_riscv_padd_ws:
+        ID = Intrinsic::riscv_padd_ws;
+        break;
+      case RISCV::BI__builtin_riscv_sadd:
+        ID = Intrinsic::riscv_sadd;
+        break;
+      case RISCV::BI__builtin_riscv_psrl_bs_32:
+      case RISCV::BI__builtin_riscv_psrl_bs_64:
+        ID = Intrinsic::riscv_psrl_bs;
+        break;
+      case RISCV::BI__builtin_riscv_psrl_hs_32:
+      case RISCV::BI__builtin_riscv_psrl_hs_64:
+        ID = Intrinsic::riscv_psrl_hs;
+        break;
+      case RISCV::BI__builtin_riscv_psrl_ws:
+        ID = Intrinsic::riscv_psrl_ws;
+        break;
+      case RISCV::BI__builtin_riscv_predsum_bs_32:
+      case RISCV::BI__builtin_riscv_predsum_bs_64:
+        ID = Intrinsic::riscv_predsum_bs;
+        break;
+      case RISCV::BI__builtin_riscv_predsum_hs_32:
+      case RISCV::BI__builtin_riscv_predsum_hs_64:
+        ID = Intrinsic::riscv_predsum_hs;
+        break;
+      case RISCV::BI__builtin_riscv_predsum_ws:
+        ID = Intrinsic::riscv_predsum_ws;
+        break;
+      case RISCV::BI__builtin_riscv_predsumu_bs_32:
+      case RISCV::BI__builtin_riscv_predsumu_bs_64:
+        ID = Intrinsic::riscv_predsumu_bs;
+        break;
+      case RISCV::BI__builtin_riscv_predsumu_hs_32:
+      case RISCV::BI__builtin_riscv_predsumu_hs_64:
+        ID = Intrinsic::riscv_predsumu_hs;
+        break;
+      case RISCV::BI__builtin_riscv_predsumu_ws:
+        ID = Intrinsic::riscv_predsumu_ws;
+        break;
+      case RISCV::BI__builtin_riscv_psra_bs_32:
+      case RISCV::BI__builtin_riscv_psra_bs_64:
+        ID = Intrinsic::riscv_psra_bs;
+        break;
+      case RISCV::BI__builtin_riscv_psra_hs_32:
+      case RISCV::BI__builtin_riscv_psra_hs_64:
+        ID = Intrinsic::riscv_psra_hs;
+        break;
+      case RISCV::BI__builtin_riscv_psra_ws:
+        ID = Intrinsic::riscv_psra_ws;
+        break;
+      case RISCV::BI__builtin_riscv_padd_b_32:
+      case RISCV::BI__builtin_riscv_padd_b_64:
+        ID = Intrinsic::riscv_padd_b;
+        break;
+      case RISCV::BI__builtin_riscv_padd_h_32:
+      case RISCV::BI__builtin_riscv_padd_h_64:
+        ID = Intrinsic::riscv_padd_h;
+        break;
+      case RISCV::BI__builtin_riscv_padd_w:
+        ID = Intrinsic::riscv_padd_w;
+        break;
+      case RISCV::BI__builtin_riscv_psadd_b_32:
+      case RISCV::BI__builtin_riscv_psadd_b_64:
+        ID = Intrinsic::riscv_psadd_b;
+        break;
+      case RISCV::BI__builtin_riscv_psadd_h_32:
+      case RISCV::BI__builtin_riscv_psadd_h_64:
+        ID = Intrinsic::riscv_psadd_h;
+        break;
+      case RISCV::BI__builtin_riscv_psadd_w:
+        ID = Intrinsic::riscv_psadd_w;
+        break;
+      case RISCV::BI__builtin_riscv_aadd:
+        ID = Intrinsic::riscv_aadd;
+        break;
+      case RISCV::BI__builtin_riscv_paadd_b_32:
+      case RISCV::BI__builtin_riscv_paadd_b_64:
+        ID = Intrinsic::riscv_paadd_b;
+        break;
+      case RISCV::BI__builtin_riscv_paadd_h_32:
+      case RISCV::BI__builtin_riscv_paadd_h_64:
+        ID = Intrinsic::riscv_paadd_h;
+        break;
+      case RISCV::BI__builtin_riscv_paadd_w:
+        ID = Intrinsic::riscv_paadd_w;
+        break;
+      case RISCV::BI__builtin_riscv_saddu:
+        ID = Intrinsic::riscv_saddu;
+        break;
+      case RISCV::BI__builtin_riscv_psaddu_b_32:
+      case RISCV::BI__builtin_riscv_psaddu_b_64:
+        ID = Intrinsic::riscv_psaddu_b;
+        break;
+      case RISCV::BI__builtin_riscv_psaddu_h_32:
+      case RISCV::BI__builtin_riscv_psaddu_h_64:
+        ID = Intrinsic::riscv_psaddu_h;
+        break;
+      case RISCV::BI__builtin_riscv_psaddu_w:
+        ID = Intrinsic::riscv_psaddu_w;
+        break;
+      case RISCV::BI__builtin_riscv_aaddu:
+        ID = Intrinsic::riscv_aaddu;
+        break;
+      case RISCV::BI__builtin_riscv_paaddu_b_32:
+      case RISCV::BI__builtin_riscv_paaddu_b_64:
+        ID = Intrinsic::riscv_paaddu_b;
+        break;
+      case RISCV::BI__builtin_riscv_paaddu_h_32:
+      case RISCV::BI__builtin_riscv_paaddu_h_64:
+        ID = Intrinsic::riscv_paaddu_h;
+        break;
+      case RISCV::BI__builtin_riscv_paaddu_w:
+        ID = Intrinsic::riscv_paaddu_w;
+        break;
+      case RISCV::BI__builtin_riscv_psub_b_32:
+      case RISCV::BI__builtin_riscv_psub_b_64:
+        ID = Intrinsic::riscv_psub_b;
+        break;
+      case RISCV::BI__builtin_riscv_psub_h_32:
+      case RISCV::BI__builtin_riscv_psub_h_64:
+        ID = Intrinsic::riscv_psub_h;
+        break;
+      case RISCV::BI__builtin_riscv_psub_w:
+        ID = Intrinsic::riscv_psub_w;
+        break;
+      case RISCV::BI__builtin_riscv_ssub:
+        ID = Intrinsic::riscv_ssub;
+        break;
+      case RISCV::BI__builtin_riscv_pssub_b_32:
+      case RISCV::BI__builtin_riscv_pssub_b_64:
+        ID = Intrinsic::riscv_pssub_b;
+        break;
+      case RISCV::BI__builtin_riscv_pssub_h_32:
+      case RISCV::BI__builtin_riscv_pssub_h_64:
+        ID = Intrinsic::riscv_pssub_h;
+        break;
+      case RISCV::BI__builtin_riscv_pssub_w:
+        ID = Intrinsic::riscv_pssub_w;
+        break;
+      case RISCV::BI__builtin_riscv_asub:
+        ID = Intrinsic::riscv_asub;
+        break;
+      case RISCV::BI__builtin_riscv_pasub_b_32:
+      case RISCV::BI__builtin_riscv_pasub_b_64:
+        ID = Intrinsic::riscv_pasub_b;
+        break;
+      case RISCV::BI__builtin_riscv_pasub_h_32:
+      case RISCV::BI__builtin_riscv_pasub_h_64:
+        ID = Intrinsic::riscv_pasub_h;
+        break;
+      case RISCV::BI__builtin_riscv_pasub_w:
+        ID = Intrinsic::riscv_pasub_w;
+        break;
+      case RISCV::BI__builtin_riscv_ssubu:
+        ID = Intrinsic::riscv_ssubu;
+        break;
+      case RISCV::BI__builtin_riscv_pssubu_b_32:
+      case RISCV::BI__builtin_riscv_pssubu_b_64:
+        ID = Intrinsic::riscv_pssubu_b;
+        break;
+      case RISCV::BI__builtin_riscv_pssubu_h_32:
+      case RISCV::BI__builtin_riscv_pssubu_h_64:
+        ID = Intrinsic::riscv_pssubu_h;
+        break;
+      case RISCV::BI__builtin_riscv_pssubu_w:
+        ID = Intrinsic::riscv_pssubu_w;
+        break;
+      case RISCV::BI__builtin_riscv_asubu:
+        ID = Intrinsic::riscv_asubu;
+        break;
+      case RISCV::BI__builtin_riscv_pasubu_b_32:
+      case RISCV::BI__builtin_riscv_pasubu_b_64:
+        ID = Intrinsic::riscv_pasubu_b;
+        break;
+      case RISCV::BI__builtin_riscv_pasubu_h_32:
+      case RISCV::BI__builtin_riscv_pasubu_h_64:
+        ID = Intrinsic::riscv_pasubu_h;
+        break;
+      case RISCV::BI__builtin_riscv_pasubu_w:
+        ID = Intrinsic::riscv_pasubu_w;
+        break;
+      case RISCV::BI__builtin_riscv_pdif_b_32:
+      case RISCV::BI__builtin_riscv_pdif_b_64:
+        ID = Intrinsic::riscv_pdif_b;
+        break;
+      case RISCV::BI__builtin_riscv_pdif_h_32:
+      case RISCV::BI__builtin_riscv_pdif_h_64:
+        ID = Intrinsic::riscv_pdif_h;
+        break;
+      case RISCV::BI__builtin_riscv_pdifu_b_32:
+      case RISCV::BI__builtin_riscv_pdifu_b_64:
+        ID = Intrinsic::riscv_pdifu_b;
+        break;
+      case RISCV::BI__builtin_riscv_pdifu_h_32:
+      case RISCV::BI__builtin_riscv_pdifu_h_64:
+        ID = Intrinsic::riscv_pdifu_h;
+        break;
+      case RISCV::BI__builtin_riscv_mul_h01:
+        ID = Intrinsic::riscv_mul_h01;
+        break;
+      case RISCV::BI__builtin_riscv_mul_w01:
+        ID = Intrinsic::riscv_mul_w01;
+        break;
+      case RISCV::BI__builtin_riscv_mulu_h01:
+        ID = Intrinsic::riscv_mulu_h01;
+        break;
+      case RISCV::BI__builtin_riscv_mulu_w01:
+        ID = Intrinsic::riscv_mulu_w01;
+        break;
+      case RISCV::BI__builtin_riscv_slx_32:
+      case RISCV::BI__builtin_riscv_slx_64:
+        ID = Intrinsic::riscv_slx;
+        break;
+      case RISCV::BI__builtin_riscv_psh1add_h_32:
+      case RISCV::BI__builtin_riscv_psh1add_h_64:
+        ID = Intrinsic::riscv_psh1add_h;
+        break;
+      case RISCV::BI__builtin_riscv_psh1add_w:
+        ID = Intrinsic::riscv_psh1add_w;
+        break;
+      case RISCV::BI__builtin_riscv_ssh1sadd:
+        ID = Intrinsic::riscv_ssh1sadd;
+        break;
+      case RISCV::BI__builtin_riscv_pssh1sadd_h_32:
+      case RISCV::BI__builtin_riscv_pssh1sadd_h_64:
+        ID = Intrinsic::riscv_pssh1sadd_h;
+        break;
+      case RISCV::BI__builtin_riscv_pssh1sadd_w:
+        ID = Intrinsic::riscv_pssh1sadd_w;
+        break;
+      case RISCV::BI__builtin_riscv_unzip8p:
+        ID = Intrinsic::riscv_unzip8p;
+        break;
+      case RISCV::BI__builtin_riscv_unzip16p:
+        ID = Intrinsic::riscv_unzip16p;
+        break;
+      case RISCV::BI__builtin_riscv_unzip8hp:
+        ID = Intrinsic::riscv_unzip8hp;
+        break;
+      case RISCV::BI__builtin_riscv_unzip16hp:
+        ID = Intrinsic::riscv_unzip16hp;
+        break;
+      case RISCV::BI__builtin_riscv_zip8p:
+        ID = Intrinsic::riscv_zip8p;
+        break;
+      case RISCV::BI__builtin_riscv_zip16p:
+        ID = Intrinsic::riscv_zip16p;
+        break;
+      case RISCV::BI__builtin_riscv_zip8hp:
+        ID = Intrinsic::riscv_zip8hp;
+        break;
+      case RISCV::BI__builtin_riscv_zip16hp:
+        ID = Intrinsic::riscv_zip16hp;
+        break;
+      case RISCV::BI__builtin_riscv_ppack_h_32:
+      case RISCV::BI__builtin_riscv_ppack_h_64:
+        ID = Intrinsic::riscv_ppack_h;
+        break;
+      case RISCV::BI__builtin_riscv_ppack_w:
+        ID = Intrinsic::riscv_ppack_w;
+        break;
+      case RISCV::BI__builtin_riscv_ppackbt_h_32:
+      case RISCV::BI__builtin_riscv_ppackbt_h_64:
+        ID = Intrinsic::riscv_ppackbt_h;
+        break;
+      case RISCV::BI__builtin_riscv_ppackbt_w:
+        ID = Intrinsic::riscv_ppackbt_w;
+        break;
+      case RISCV::BI__builtin_riscv_packbt_32:
+      case RISCV::BI__builtin_riscv_packbt_64:
+        ID = Intrinsic::riscv_packbt;
+        break;
+      case RISCV::BI__builtin_riscv_ppacktb_h_32:
+      case RISCV::BI__builtin_riscv_ppacktb_h_64:
+        ID = Intrinsic::riscv_ppacktb_h;
+        break;
+      case RISCV::BI__builtin_riscv_ppacktb_w:
+        ID = Intrinsic::riscv_ppacktb_w;
+        break;
+      case RISCV::BI__builtin_riscv_packtb_32:
+      case RISCV::BI__builtin_riscv_packtb_64:
+        ID = Intrinsic::riscv_packtb;
+        break;
+      case RISCV::BI__builtin_riscv_ppackt_h_32:
+      case RISCV::BI__builtin_riscv_ppackt_h_64:
+        ID = Intrinsic::riscv_ppackt_h;
+        break;
+      case RISCV::BI__builtin_riscv_ppackt_w:
+        ID = Intrinsic::riscv_ppackt_w;
+        break;
+      case RISCV::BI__builtin_riscv_packt_32:
+      case RISCV::BI__builtin_riscv_packt_64:
+        ID = Intrinsic::riscv_packt;
+        break;
+      case RISCV::BI__builtin_riscv_pas_hx_32:
+      case RISCV::BI__builtin_riscv_pas_hx_64:
+        ID = Intrinsic::riscv_pas_hx;
+        break;
+      case RISCV::BI__builtin_riscv_pas_wx:
+        ID = Intrinsic::riscv_pas_wx;
+        break;
+      case RISCV::BI__builtin_riscv_psa_hx_32:
+      case RISCV::BI__builtin_riscv_psa_hx_64:
+        ID = Intrinsic::riscv_psa_hx;
+        break;
+      case RISCV::BI__builtin_riscv_psa_wx:
+        ID = Intrinsic::riscv_psa_wx;
+        break;
+      case RISCV::BI__builtin_riscv_psas_hx_32:
+      case RISCV::BI__builtin_riscv_psas_hx_64:
+        ID = Intrinsic::riscv_psas_hx;
+        break;
+      case RISCV::BI__builtin_riscv_psas_wx:
+        ID = Intrinsic::riscv_psas_wx;
+        break;
+      case RISCV::BI__builtin_riscv_pssa_hx_32:
+      case RISCV::BI__builtin_riscv_pssa_hx_64:
+        ID = Intrinsic::riscv_pssa_hx;
+        break;
+      case RISCV::BI__builtin_riscv_pssa_wx:
+        ID = Intrinsic::riscv_pssa_wx;
+        break;
+      case RISCV::BI__builtin_riscv_paas_hx_32:
+      case RISCV::BI__builtin_riscv_paas_hx_64:
+        ID = Intrinsic::riscv_paas_hx;
+        break;
+      case RISCV::BI__builtin_riscv_paas_wx:
+        ID = Intrinsic::riscv_paas_wx;
+        break;
+      case RISCV::BI__builtin_riscv_pasa_hx_32:
+      case RISCV::BI__builtin_riscv_pasa_hx_64:
+        ID = Intrinsic::riscv_pasa_hx;
+        break;
+      case RISCV::BI__builtin_riscv_pasa_wx:
+        ID = Intrinsic::riscv_pasa_wx;
+        break;
+      case RISCV::BI__builtin_riscv_mseq:
+        ID = Intrinsic::riscv_mseq;
+        break;
+      case RISCV::BI__builtin_riscv_pmseq_b_32:
+      case RISCV::BI__builtin_riscv_pmseq_b_64:
+        ID = Intrinsic::riscv_pmseq_b;
+        break;
+      case RISCV::BI__builtin_riscv_pmseq_h_32:
+      case RISCV::BI__builtin_riscv_pmseq_h_64:
+        ID = Intrinsic::riscv_pmseq_h;
+        break;
+      case RISCV::BI__builtin_riscv_pmseq_w:
+        ID = Intrinsic::riscv_pmseq_w;
+        break;
+      case RISCV::BI__builtin_riscv_mslt:
+        ID = Intrinsic::riscv_mslt;
+        break;
+      case RISCV::BI__builtin_riscv_pmslt_b_32:
+      case RISCV::BI__builtin_riscv_pmslt_b_64:
+        ID = Intrinsic::riscv_pmslt_b;
+        break;
+      case RISCV::BI__builtin_riscv_pmslt_h_32:
+      case RISCV::BI__builtin_riscv_pmslt_h_64:
+        ID = Intrinsic::riscv_pmslt_h;
+        break;
+      case RISCV::BI__builtin_riscv_pmslt_w:
+        ID = Intrinsic::riscv_pmslt_w;
+        break;
+      case RISCV::BI__builtin_riscv_msltu:
+        ID = Intrinsic::riscv_msltu;
+        break;
+      case RISCV::BI__builtin_riscv_pmsltu_b_32:
+      case RISCV::BI__builtin_riscv_pmsltu_b_64:
+        ID = Intrinsic::riscv_pmsltu_b;
+        break;
+      case RISCV::BI__builtin_riscv_pmsltu_h_32:
+      case RISCV::BI__builtin_riscv_pmsltu_h_64:
+        ID = Intrinsic::riscv_pmsltu_h;
+        break;
+      case RISCV::BI__builtin_riscv_pmsltu_w:
+        ID = Intrinsic::riscv_pmsltu_w;
+        break;
+      case RISCV::BI__builtin_riscv_pmin_b_32:
+      case RISCV::BI__builtin_riscv_pmin_b_64:
+        ID = Intrinsic::riscv_pmin_b;
+        break;
+      case RISCV::BI__builtin_riscv_pmin_h_32:
+      case RISCV::BI__builtin_riscv_pmin_h_64:
+        ID = Intrinsic::riscv_pmin_h;
+        break;
+      case RISCV::BI__builtin_riscv_pmin_w:
+        ID = Intrinsic::riscv_pmin_w;
+        break;
+      case RISCV::BI__builtin_riscv_pminu_b_32:
+      case RISCV::BI__builtin_riscv_pminu_b_64:
+        ID = Intrinsic::riscv_pminu_b;
+        break;
+      case RISCV::BI__builtin_riscv_pminu_h_32:
+      case RISCV::BI__builtin_riscv_pminu_h_64:
+        ID = Intrinsic::riscv_pminu_h;
+        break;
+      case RISCV::BI__builtin_riscv_pminu_w:
+        ID = Intrinsic::riscv_pminu_w;
+        break;
+      case RISCV::BI__builtin_riscv_pmax_b_32:
+      case RISCV::BI__builtin_riscv_pmax_b_64:
+        ID = Intrinsic::riscv_pmax_b;
+        break;
+      case RISCV::BI__builtin_riscv_pmax_h_32:
+      case RISCV::BI__builtin_riscv_pmax_h_64:
+        ID = Intrinsic::riscv_pmax_h;
+        break;
+      case RISCV::BI__builtin_riscv_pmax_w:
+        ID = Intrinsic::riscv_pmax_w;
+        break;
+      case RISCV::BI__builtin_riscv_pmaxu_b_32:
+      case RISCV::BI__builtin_riscv_pmaxu_b_64:
+        ID = Intrinsic::riscv_pmaxu_b;
+        break;
+      case RISCV::BI__builtin_riscv_pmaxu_h_32:
+      case RISCV::BI__builtin_riscv_pmaxu_h_64:
+        ID = Intrinsic::riscv_pmaxu_h;
+        break;
+      case RISCV::BI__builtin_riscv_pmaxu_w:
+        ID = Intrinsic::riscv_pmaxu_w;
+        break;
+      case RISCV::BI__builtin_riscv_pmulh_h_32:
+      case RISCV::BI__builtin_riscv_pmulh_h_64:
+        ID = Intrinsic::riscv_pmulh_h;
+        break;
+      case RISCV::BI__builtin_riscv_pmulh_w:
+        ID = Intrinsic::riscv_pmulh_w;
+        break;
+      case RISCV::BI__builtin_riscv_pmulhu_h_32:
+      case RISCV::BI__builtin_riscv_pmulhu_h_64:
+        ID = Intrinsic::riscv_pmulhu_h;
+        break;
+      case RISCV::BI__builtin_riscv_pmulhu_w:
+        ID = Intrinsic::riscv_pmulhu_w;
+        break;
+      case RISCV::BI__builtin_riscv_pmulhr_h_32:
+      case RISCV::BI__builtin_riscv_pmulhr_h_64:
+        ID = Intrinsic::riscv_pmulhr_h;
+        break;
+      case RISCV::BI__builtin_riscv_pmulhr_w:
+        ID = Intrinsic::riscv_pmulhr_w;
+        break;
+      case RISCV::BI__builtin_riscv_pmulhru_h_32:
+      case RISCV::BI__builtin_riscv_pmulhru_h_64:
+        ID = Intrinsic::riscv_pmulhru_h;
+        break;
+      case RISCV::BI__builtin_riscv_pmulhru_w:
+        ID = Intrinsic::riscv_pmulhru_w;
+        break;
+      case RISCV::BI__builtin_riscv_mulh_h1:
+        ID = Intrinsic::riscv_mulh_h1;
+        break;
+      case RISCV::BI__builtin_riscv_mulhr:
+        ID = Intrinsic::riscv_mulhr;
+        break;
+      case RISCV::BI__builtin_riscv_mulhru:
+        ID = Intrinsic::riscv_mulhru;
+        break;
+      case RISCV::BI__builtin_riscv_mulh_h0:
+        ID = Intrinsic::riscv_mulh_h0;
+        break;
     }
 
-    IntrinsicTypes = {ResultType};
-    break;
+    IntrinsicTypes = {Ops[0]->getType()};
+    break;;
   }
 
+  // Intrinsic type is obtained from Result and Ops[1].
   case RISCV::BI__builtin_riscv_pslli_b_32:
   case RISCV::BI__builtin_riscv_pslli_b_64:
   case RISCV::BI__builtin_riscv_pslli_h_32:
@@ -956,6 +966,7 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_psslai_h_32:
   case RISCV::BI__builtin_riscv_psslai_h_64:
   case RISCV::BI__builtin_riscv_psslai_w:
+  case RISCV::BI__builtin_riscv_sslai:
   case RISCV::BI__builtin_riscv_pusati_h_32:
   case RISCV::BI__builtin_riscv_pusati_h_64:
   case RISCV::BI__builtin_riscv_pusati_w:
@@ -981,8 +992,7 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_pmulhrsu_w:
   case RISCV::BI__builtin_riscv_mulhsu_h0:
   case RISCV::BI__builtin_riscv_mulhsu_h1:
-  case RISCV::BI__builtin_riscv_mulhrsu:
-  case RISCV::BI__builtin_riscv_sslai: {
+  case RISCV::BI__builtin_riscv_mulhrsu: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
     case RISCV::BI__builtin_riscv_pslli_b_32:
@@ -1067,16 +1077,18 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
       ID = Intrinsic::riscv_mulhrsu;
       break;
     }
+
     IntrinsicTypes = {ResultType, Ops[1]->getType()};
     break;
   }
 
-
+  // Intrinsic type is obtained from Result and Ops[0].
   case RISCV::BI__builtin_riscv_pmul_h_b01_32:
   case RISCV::BI__builtin_riscv_pmul_h_b01_64:
   case RISCV::BI__builtin_riscv_pmul_w_h01:
   case RISCV::BI__builtin_riscv_pmulu_h_b01_32:
   case RISCV::BI__builtin_riscv_pmulu_h_b01_64:
+  case RISCV::BI__builtin_riscv_pmulu_w_h01:
   case RISCV::BI__builtin_riscv_pmul_h_b00_32:
   case RISCV::BI__builtin_riscv_pmul_h_b00_64:
   case RISCV::BI__builtin_riscv_pmul_w_h00:
@@ -1102,8 +1114,7 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_pmulh_w_h0:
   case RISCV::BI__builtin_riscv_pmulh_h_b1_32:
   case RISCV::BI__builtin_riscv_pmulh_h_b1_64:
-  case RISCV::BI__builtin_riscv_pmulh_w_h1:
-  case RISCV::BI__builtin_riscv_pmulu_w_h01: {
+  case RISCV::BI__builtin_riscv_pmulh_w_h1: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
     case RISCV::BI__builtin_riscv_pmul_h_b01_32:
@@ -1187,6 +1198,7 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
       ID = Intrinsic::riscv_pmulh_w_h1;
       break;
     }
+
     IntrinsicTypes = {ResultType, Ops[0]->getType()};
     break;
   }
@@ -1200,6 +1212,7 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_mulsu_h00:
   case RISCV::BI__builtin_riscv_mulsu_w00:
   case RISCV::BI__builtin_riscv_mulsu_h11:
+  case RISCV::BI__builtin_riscv_mulsu_w11:
   case RISCV::BI__builtin_riscv_pmulhsu_h_32:
   case RISCV::BI__builtin_riscv_pmulhsu_h_64:
   case RISCV::BI__builtin_riscv_pmulhsu_w:
@@ -1208,8 +1221,7 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_pmulhsu_w_h0:
   case RISCV::BI__builtin_riscv_pmulhsu_h_b1_32:
   case RISCV::BI__builtin_riscv_pmulhsu_h_b1_64:
-  case RISCV::BI__builtin_riscv_pmulhsu_w_h1:
-  case RISCV::BI__builtin_riscv_mulsu_w11: {
+  case RISCV::BI__builtin_riscv_pmulhsu_w_h1: {
     switch (BuiltinID) {
     default: llvm_unreachable("unexpected builtin ID");
     case RISCV::BI__builtin_riscv_pmulsu_h_b00_32:
@@ -1260,6 +1272,7 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
       ID = Intrinsic::riscv_pmulhsu_w_h1;
       break;
     }
+
     IntrinsicTypes = {ResultType, Ops[0]->getType(), Ops[1]->getType()};
     break;
   }
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 99a53f33dc872..af49c244dc035 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -2135,20 +2135,20 @@ let TargetPrefix = "riscv" in {
     def "int_riscv_" # NAME   : RVPBinaryABCIntrinsics;
   }
 
-  defm pmulsu_h_b00      : RVPBinaryABCIntrinsics;
-  defm pmulsu_w_h00      : RVPBinaryABCIntrinsics;
-  defm pmulsu_h_b11      : RVPBinaryABCIntrinsics;
-  defm pmulsu_w_h11      : RVPBinaryABCIntrinsics;
-  defm mulsu_h00         : RVPBinaryABCIntrinsics;
-  defm mulsu_w00         : RVPBinaryABCIntrinsics;
-  defm mulsu_h11         : RVPBinaryABCIntrinsics;
-  defm mulsu_w11         : RVPBinaryABCIntrinsics;
-  defm pmulhsu_h         : RVPBinaryABCIntrinsics;
-  defm pmulhsu_w         : RVPBinaryABCIntrinsics;
-  defm pmulhsu_h_b0      : RVPBinaryABCIntrinsics;
-  defm pmulhsu_w_h0      : RVPBinaryABCIntrinsics;
-  defm pmulhsu_h_b1      : RVPBinaryABCIntrinsics;
-  defm pmulhsu_w_h1      : RVPBinaryABCIntrinsics;
+  defm pmulsu_h_b00 : RVPBinaryABCIntrinsics;
+  defm pmulsu_w_h00 : RVPBinaryABCIntrinsics;
+  defm pmulsu_h_b11 : RVPBinaryABCIntrinsics;
+  defm pmulsu_w_h11 : RVPBinaryABCIntrinsics;
+  defm mulsu_h00    : RVPBinaryABCIntrinsics;
+  defm mulsu_w00    : RVPBinaryABCIntrinsics;
+  defm mulsu_h11    : RVPBinaryABCIntrinsics;
+  defm mulsu_w11    : RVPBinaryABCIntrinsics;
+  defm pmulhsu_h    : RVPBinaryABCIntrinsics;
+  defm pmulhsu_w    : RVPBinaryABCIntrinsics;
+  defm pmulhsu_h_b0 : RVPBinaryABCIntrinsics;
+  defm pmulhsu_w_h0 : RVPBinaryABCIntrinsics;
+  defm pmulhsu_h_b1 : RVPBinaryABCIntrinsics;
+  defm pmulhsu_w_h1 : RVPBinaryABCIntrinsics;
 } // TargetPrefix = "riscv"
 
 // Vendor extensions



More information about the cfe-commits mailing list