[clang] [llvm] [RISCV] Implement Clang Builtins for XCValu Extension in CV32E40P (PR #100684)

via cfe-commits cfe-commits at lists.llvm.org
Mon Sep 30 02:35:49 PDT 2024


https://github.com/realqhc updated https://github.com/llvm/llvm-project/pull/100684

>From 2ea18ade66163a6f3b0d8a454f5c11bb7f99d888 Mon Sep 17 00:00:00 2001
From: Qihan Cai <caiqihan021 at hotmail.com>
Date: Fri, 26 Jul 2024 12:26:16 +1000
Subject: [PATCH 1/8] [RISCV] Implement Clang Builtins for XCValu Extension in
 CV32E40P

This commit adds the Clang Builtins, C API header and relevant tests for XCValu extension.

Spec: https://github.com/openhwgroup/core-v-sw/blob/master/specifications/corev-builtin-spec.md

Contributor: @melonedo, @PaoloS02
---
 .../include/clang/Basic/BuiltinsRISCVXCV.def  |  41 +++
 clang/include/clang/Basic/TargetBuiltins.h    |  10 +
 clang/include/module.modulemap                |   1 +
 clang/lib/Basic/Targets/RISCV.cpp             |   9 +-
 clang/lib/CodeGen/CGBuiltin.cpp               |  31 ++
 clang/lib/Headers/CMakeLists.txt              |   1 +
 clang/lib/Headers/riscv_corev_alu.h           | 128 ++++++++
 clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c | 126 ++++++++
 clang/test/CodeGen/RISCV/riscv-xcvalu.c       | 303 ++++++++++++++++++
 llvm/include/llvm/IR/IntrinsicsRISCVXCV.td    |  26 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td    |  27 +-
 llvm/test/CodeGen/RISCV/xcvalu.ll             | 222 +++++++++----
 12 files changed, 852 insertions(+), 73 deletions(-)
 create mode 100644 clang/include/clang/Basic/BuiltinsRISCVXCV.def
 create mode 100644 clang/lib/Headers/riscv_corev_alu.h
 create mode 100644 clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c
 create mode 100644 clang/test/CodeGen/RISCV/riscv-xcvalu.c

diff --git a/clang/include/clang/Basic/BuiltinsRISCVXCV.def b/clang/include/clang/Basic/BuiltinsRISCVXCV.def
new file mode 100644
index 00000000000000..29c59cdf005d03
--- /dev/null
+++ b/clang/include/clang/Basic/BuiltinsRISCVXCV.def
@@ -0,0 +1,41 @@
+//==- BuiltinsRISCVXCV.def - RISC-V CORE-V Builtin database ----*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the CORE-V-specific builtin function database.  Users of
+// this file must define the BUILTIN macro to make use of this information.
+//
+//===----------------------------------------------------------------------===//
+
+#if defined(BUILTIN) && !defined(TARGET_BUILTIN)
+#   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
+#endif
+
+TARGET_BUILTIN(alu_slet, "ZiZiZi", "nc", "xcvalu")
+TARGET_BUILTIN(alu_sletu, "ZiUZiUZi", "nc", "xcvalu")
+TARGET_BUILTIN(alu_min, "ZiZiZi", "nc", "xcvalu")
+TARGET_BUILTIN(alu_minu, "UZiUZiUZi", "nc", "xcvalu")
+TARGET_BUILTIN(alu_max, "ZiZiZi", "nc", "xcvalu")
+TARGET_BUILTIN(alu_maxu, "UZiUZiUZi", "nc", "xcvalu")
+TARGET_BUILTIN(alu_exths, "Zis", "nc", "xcvalu")
+TARGET_BUILTIN(alu_exthz, "UZiUs", "nc", "xcvalu")
+TARGET_BUILTIN(alu_extbs, "Zic", "nc", "xcvalu")
+TARGET_BUILTIN(alu_extbz, "UZiUc", "nc", "xcvalu")
+
+TARGET_BUILTIN(alu_clip, "ZiZiUZi", "nc", "xcvalu")
+TARGET_BUILTIN(alu_clipu, "UZiUZiUZi", "nc", "xcvalu")
+TARGET_BUILTIN(alu_addN, "ZiZiUZiUc", "nc", "xcvalu")
+TARGET_BUILTIN(alu_adduN, "UZiUZiUZiUc", "nc", "xcvalu")
+TARGET_BUILTIN(alu_addRN, "ZiZiZiUc", "nc", "xcvalu")
+TARGET_BUILTIN(alu_adduRN, "UZiUZiUZiUc", "nc", "xcvalu")
+TARGET_BUILTIN(alu_subN, "ZiZiUZiUc", "nc", "xcvalu")
+TARGET_BUILTIN(alu_subuN, "UZiUZiUZiUc", "nc", "xcvalu")
+TARGET_BUILTIN(alu_subRN, "ZiZiZiUc", "nc", "xcvalu")
+TARGET_BUILTIN(alu_subuRN, "UZiUZiUZiUc", "nc", "xcvalu")
+
+#undef BUILTIN
+#undef TARGET_BUILTIN
diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index d0f41b17c154f3..f25fa22a95a1ed 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -152,12 +152,22 @@ namespace clang {
   };
   }
 
+  namespace RISCVXCV {
+  enum {
+    LastRVVBuiltin = RISCVVector::FirstTSBuiltin - 1,
+#define BUILTIN(ID, TYPE, ATTRS) BI__builtin_riscv_cv_##ID,
+#include "clang/Basic/BuiltinsRISCVXCV.def"
+    FirstTSBuiltin,
+  };
+  } // namespace RISCVXCV
+
   /// RISCV builtins
   namespace RISCV {
   enum {
     LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1,
     FirstRVVBuiltin = clang::Builtin::FirstTSBuiltin,
     LastRVVBuiltin = RISCVVector::FirstTSBuiltin - 1,
+    LastXCVBuiltin = RISCVXCV::FirstTSBuiltin - 1,
 #define BUILTIN(ID, TYPE, ATTRS) BI##ID,
 #include "clang/Basic/BuiltinsRISCV.inc"
     LastTSBuiltin
diff --git a/clang/include/module.modulemap b/clang/include/module.modulemap
index b6ab99bb85d8a7..7b177db4505e5c 100644
--- a/clang/include/module.modulemap
+++ b/clang/include/module.modulemap
@@ -55,6 +55,7 @@ module Clang_Basic {
   textual header "clang/Basic/BuiltinsNEON.def"
   textual header "clang/Basic/BuiltinsNVPTX.def"
   textual header "clang/Basic/BuiltinsPPC.def"
+  textual header "Basic/BuiltinsRISCVXCV.def"
   textual header "clang/Basic/BuiltinsRISCVVector.def"
   textual header "clang/Basic/BuiltinsSME.def"
   textual header "clang/Basic/BuiltinsSVE.def"
diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index b6ea4440507ea1..03e4b07e12305f 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -233,7 +233,14 @@ static constexpr Builtin::Info BuiltinInfo[] = {
   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
 #include "clang/Basic/BuiltinsRISCVVector.def"
 #define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+  {"__builtin_riscv_cv_" #ID, TYPE,         ATTRS, nullptr,                    \
+   HeaderDesc::NO_HEADER,     ALL_LANGUAGES},
+#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
+  {"__builtin_riscv_cv_" #ID, TYPE,         ATTRS, FEATURE,                    \
+   HeaderDesc::NO_HEADER,     ALL_LANGUAGES},
+#include "clang/Basic/BuiltinsRISCVXCV.def"
+#define BUILTIN(ID, TYPE, ATTRS)                                               \
+  \ {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
 #include "clang/Basic/BuiltinsRISCV.inc"
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 9033cd1ccd781d..b0dfc12a0f559b 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -22114,6 +22114,30 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
   return nullptr;
 }
 
+static Value *EmitXCVIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID,
+                               unsigned IntrinsicID,
+                               MutableArrayRef<Value *> Ops,
+                               const CallExpr *E) {
+  llvm::Type *MachineType =
+      llvm::IntegerType::getInt32Ty(CGF.CGM.getLLVMContext());
+  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
+    if (Ops[i]->getType() != MachineType) {
+      QualType type = E->getArg(i)->getType();
+      assert((type->isSignedIntegerType() || type->isUnsignedIntegerType() ||
+              type->isPointerType()) &&
+             "Argument of Core-V builtin must have signed or unsigned integer "
+             "or Pointer type");
+      if (type->isSignedIntegerType()) {
+        Ops[i] = CGF.Builder.CreateSExt(Ops[i], MachineType);
+      } else if ((type->isUnsignedIntegerType())) {
+        Ops[i] = CGF.Builder.CreateZExt(Ops[i], MachineType);
+      }
+    }
+  }
+  llvm::Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
+  return CGF.Builder.CreateCall(F, Ops);
+}
+
 Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
                                              const CallExpr *E,
                                              ReturnValueSlot ReturnValue) {
@@ -22340,6 +22364,13 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     return Store;
   }
 
+// Core-V
+#define BUILTIN(NAME, TYPE, ATTRS)                                             \
+  case RISCVXCV::BI__builtin_riscv_cv_##NAME:                                  \
+    ID = Intrinsic::riscv_cv_##NAME;                                           \
+    return EmitXCVIntrinsic(*this, BuiltinID, ID, Ops, E);
+#include "clang/Basic/BuiltinsRISCVXCV.def"
+
   // Vector builtins are handled from here.
 #include "clang/Basic/riscv_vector_builtin_cg.inc"
   // SiFive Vector builtins are handled from here.
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index f5cc07c303f9eb..a2ea1d80175a3a 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -123,6 +123,7 @@ set(riscv_files
   riscv_crypto.h
   riscv_ntlh.h
   sifive_vector.h
+  riscv_corev_alu.h
   )
 
 set(systemz_files
diff --git a/clang/lib/Headers/riscv_corev_alu.h b/clang/lib/Headers/riscv_corev_alu.h
new file mode 100644
index 00000000000000..d6230022579b8e
--- /dev/null
+++ b/clang/lib/Headers/riscv_corev_alu.h
@@ -0,0 +1,128 @@
+/*===---- riscv_corev_alu.h - CORE-V ALU intrinsics ------------------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __RISCV_COREV_ALU_H
+#define __RISCV_COREV_ALU_H
+
+#include <stdint.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#if defined(__riscv_xcvalu)
+
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+
+static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_abs(long a) {
+  return __builtin_abs(a);
+}
+
+static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_slet(long a, long b) {
+  return __builtin_riscv_cv_alu_slet(a, b);
+}
+
+static __inline__ long __DEFAULT_FN_ATTRS
+__riscv_cv_alu_sletu(unsigned long a, unsigned long b) {
+  return __builtin_riscv_cv_alu_sletu(a, b);
+}
+
+static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_min(long a, long b) {
+  return __builtin_riscv_cv_alu_min(a, b);
+}
+
+static __inline__ unsigned long __DEFAULT_FN_ATTRS
+__riscv_cv_alu_minu(unsigned long a, unsigned long b) {
+  return __builtin_riscv_cv_alu_minu(a, b);
+}
+
+static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_max(long a, long b) {
+  return __builtin_riscv_cv_alu_max(a, b);
+}
+
+static __inline__ unsigned long __DEFAULT_FN_ATTRS
+__riscv_cv_alu_maxu(unsigned long a, unsigned long b) {
+  return __builtin_riscv_cv_alu_maxu(a, b);
+}
+
+static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_exths(int16_t a) {
+  return __builtin_riscv_cv_alu_exths(a);
+}
+
+static __inline__ unsigned long __DEFAULT_FN_ATTRS
+__riscv_cv_alu_exthz(uint16_t a) {
+  return __builtin_riscv_cv_alu_exthz(a);
+}
+
+static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_extbs(int8_t a) {
+  return __builtin_riscv_cv_alu_extbs(a);
+}
+
+static __inline__ unsigned long __DEFAULT_FN_ATTRS
+__riscv_cv_alu_extbz(uint8_t a) {
+  return __builtin_riscv_cv_alu_extbz(a);
+}
+
+static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_clip(long a,
+                                                              unsigned long b) {
+  return __builtin_riscv_cv_alu_clip(a, b);
+}
+
+static __inline__ unsigned long __DEFAULT_FN_ATTRS
+__riscv_cv_alu_clipu(unsigned long a, unsigned long b) {
+  return __builtin_riscv_cv_alu_clipu(a, b);
+}
+
+static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_addN(long a, long b,
+                                                              uint8_t shft) {
+  return __builtin_riscv_cv_alu_addN(a, b, shft);
+}
+
+static __inline__ unsigned long __DEFAULT_FN_ATTRS
+__riscv_cv_alu_adduN(unsigned long a, unsigned long b, uint8_t shft) {
+  return __builtin_riscv_cv_alu_adduN(a, b, shft);
+}
+
+static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_addRN(long a, long b,
+                                                               uint8_t shft) {
+  return __builtin_riscv_cv_alu_addRN(a, b, shft);
+}
+
+static __inline__ unsigned long __DEFAULT_FN_ATTRS
+__riscv_cv_alu_adduRN(unsigned long a, unsigned long b, uint8_t shft) {
+  return __builtin_riscv_cv_alu_adduRN(a, b, shft);
+}
+
+static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_subN(long a, long b,
+                                                              uint8_t shft) {
+  return __builtin_riscv_cv_alu_subN(a, b, shft);
+}
+
+static __inline__ unsigned long __DEFAULT_FN_ATTRS
+__riscv_cv_alu_subuN(unsigned long a, unsigned long b, uint8_t shft) {
+  return __builtin_riscv_cv_alu_subuN(a, b, shft);
+}
+
+static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_subRN(long a, long b,
+                                                               uint8_t shft) {
+  return __builtin_riscv_cv_alu_subRN(a, b, shft);
+}
+
+static __inline__ unsigned long __DEFAULT_FN_ATTRS
+__riscv_cv_alu_subuRN(unsigned long a, unsigned long b, uint8_t shft) {
+  return __builtin_riscv_cv_alu_subuRN(a, b, shft);
+}
+
+#endif // defined(__riscv_xcvalu)
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif // define __RISCV_COREV_ALU_H
diff --git a/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c b/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c
new file mode 100644
index 00000000000000..d5c16fbe3d7663
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c
@@ -0,0 +1,126 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple riscv32 -target-feature +xcvalu -emit-llvm %s -o - \
+// RUN:     | FileCheck %s
+
+#include <stdint.h>
+#include <riscv_corev_alu.h>
+
+// CHECK-LABEL: @test_alu_slet
+// CHECK: @llvm.riscv.cv.alu.slet
+int test_alu_slet(int32_t a, int32_t b) {
+  return __riscv_cv_alu_slet(a, b);
+}
+
+// CHECK-LABEL: @test_alu_sletu
+// CHECK: @llvm.riscv.cv.alu.sletu
+int test_alu_sletu(uint32_t a, uint32_t b) {
+  return __riscv_cv_alu_sletu(a, b);
+}
+
+// CHECK-LABEL: @test_alu_min
+// CHECK: @llvm.riscv.cv.alu.min
+int test_alu_min(int32_t a, int32_t b) {
+  return __riscv_cv_alu_min(a, b);
+}
+
+// CHECK-LABEL: @test_alu_minu
+// CHECK: @llvm.riscv.cv.alu.minu
+int test_alu_minu(uint32_t a, uint32_t b) {
+  return __riscv_cv_alu_minu(a, b);
+}
+
+// CHECK-LABEL: @test_alu_max
+// CHECK: @llvm.riscv.cv.alu.max
+int test_alu_max(int32_t a, int32_t b) {
+  return __riscv_cv_alu_max(a, b);
+}
+
+// CHECK-LABEL: @test_alu_maxu
+// CHECK: @llvm.riscv.cv.alu.maxu
+int test_alu_maxu(uint32_t a, uint32_t b) {
+  return __riscv_cv_alu_maxu(a, b);
+}
+
+// CHECK-LABEL: @test_alu_exths
+// CHECK: @llvm.riscv.cv.alu.exths
+int test_alu_exths(int16_t a) {
+  return __riscv_cv_alu_exths(a);
+}
+
+// CHECK-LABEL: @test_alu_exthz
+// CHECK: @llvm.riscv.cv.alu.exthz
+int test_alu_exthz(uint16_t a) {
+  return __riscv_cv_alu_exthz(a);
+}
+
+// CHECK-LABEL: @test_alu_extbs
+// CHECK: @llvm.riscv.cv.alu.extbs
+int test_alu_extbs(int8_t a) {
+  return __riscv_cv_alu_extbs(a);
+}
+
+// CHECK-LABEL: @test_alu_extbz
+// CHECK: @llvm.riscv.cv.alu.extbz
+int test_alu_extbz(uint8_t a) {
+  return __riscv_cv_alu_extbz(a);
+}
+
+// CHECK-LABEL: @test_alu_clip
+// CHECK: @llvm.riscv.cv.alu.clip
+int test_alu_clip(int32_t a) {
+  return __riscv_cv_alu_clip(a, 0);
+}
+
+// CHECK-LABEL: @test_alu_clipu
+// CHECK: @llvm.riscv.cv.alu.clipu
+int test_alu_clipu(uint32_t a) {
+  return __riscv_cv_alu_clipu(a, 0);
+}
+
+// CHECK-LABEL: @test_alu_addN
+// CHECK: @llvm.riscv.cv.alu.addN
+int test_alu_addN(int32_t a, int32_t b) {
+  return __riscv_cv_alu_addN(a, b, 0);
+}
+
+// CHECK-LABEL: @test_alu_adduN
+// CHECK: @llvm.riscv.cv.alu.adduN
+int test_alu_adduN(uint32_t a, uint32_t b) {
+  return __riscv_cv_alu_adduN(a, b, 0);
+}
+
+// CHECK-LABEL: @test_alu_addRN
+// CHECK: @llvm.riscv.cv.alu.addRN
+int test_alu_addRN(int32_t a, int32_t b) {
+  return __riscv_cv_alu_addRN(a, b, 0);
+}
+
+// CHECK-LABEL: @test_alu_adduRN
+// CHECK: @llvm.riscv.cv.alu.adduRN
+int test_alu_adduRN(uint32_t a, uint32_t b) {
+  return __riscv_cv_alu_adduRN(a, b, 0);
+}
+
+// CHECK-LABEL: @test_alu_subN
+// CHECK: @llvm.riscv.cv.alu.subN
+int test_alu_subN(int32_t a, int32_t b) {
+  return __riscv_cv_alu_subN(a, b, 0);
+}
+
+// CHECK-LABEL: @test_alu_subuN
+// CHECK: @llvm.riscv.cv.alu.subuN
+int test_alu_subuN(uint32_t a, uint32_t b) {
+  return __riscv_cv_alu_subuN(a, b, 0);
+}
+
+// CHECK-LABEL: @test_alu_subRN
+// CHECK: @llvm.riscv.cv.alu.subRN
+int test_alu_subRN(int32_t a, int32_t b) {
+  return __riscv_cv_alu_subRN(a, b, 0);
+}
+
+// CHECK-LABEL: @test_alu_subuRN
+// CHECK: @llvm.riscv.cv.alu.subuRN
+int test_alu_subuRN(uint32_t a, uint32_t b) {
+  return __riscv_cv_alu_subuRN(a, b, 0);
+}
diff --git a/clang/test/CodeGen/RISCV/riscv-xcvalu.c b/clang/test/CodeGen/RISCV/riscv-xcvalu.c
new file mode 100644
index 00000000000000..a3b8c07f22c815
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/riscv-xcvalu.c
@@ -0,0 +1,303 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple riscv32 -target-feature +xcvalu -emit-llvm %s -o - \
+// RUN:     | FileCheck %s
+
+#include <stdint.h>
+
+// CHECK-LABEL: @test_abs(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.abs.i32(i32 [[TMP0]], i1 true)
+// CHECK-NEXT:    ret i32 [[TMP1]]
+//
+int test_abs(int a) {
+  return __builtin_abs(a);
+}
+
+// CHECK-LABEL: @test_alu_slet(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.slet(i32 [[TMP0]], i32 [[TMP1]])
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
+int test_alu_slet(int32_t a, int32_t b) {
+  return __builtin_riscv_cv_alu_slet(a, b);
+}
+
+// CHECK-LABEL: @test_alu_sletu(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.sletu(i32 [[TMP0]], i32 [[TMP1]])
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
+int test_alu_sletu(uint32_t a, uint32_t b) {
+  return __builtin_riscv_cv_alu_sletu(a, b);
+}
+
+// CHECK-LABEL: @test_alu_min(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.min(i32 [[TMP0]], i32 [[TMP1]])
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
+int test_alu_min(int32_t a, int32_t b) {
+  return __builtin_riscv_cv_alu_min(a, b);
+}
+
+// CHECK-LABEL: @test_alu_minu(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.minu(i32 [[TMP0]], i32 [[TMP1]])
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
+int test_alu_minu(uint32_t a, uint32_t b) {
+  return __builtin_riscv_cv_alu_minu(a, b);
+}
+
+// CHECK-LABEL: @test_alu_max(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.max(i32 [[TMP0]], i32 [[TMP1]])
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
+int test_alu_max(int32_t a, int32_t b) {
+  return __builtin_riscv_cv_alu_max(a, b);
+}
+
+// CHECK-LABEL: @test_alu_maxu(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.maxu(i32 [[TMP0]], i32 [[TMP1]])
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
+int test_alu_maxu(uint32_t a, uint32_t b) {
+  return __builtin_riscv_cv_alu_maxu(a, b);
+}
+
+// CHECK-LABEL: @test_alu_exths(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i16, align 2
+// CHECK-NEXT:    store i16 [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[TMP0]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.exths(i32 [[TMP1]])
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
+int test_alu_exths(int16_t a) {
+  return __builtin_riscv_cv_alu_exths(a);
+}
+
+// CHECK-LABEL: @test_alu_exthz(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i16, align 2
+// CHECK-NEXT:    store i16 [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[TMP0]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.exthz(i32 [[TMP1]])
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
+int test_alu_exthz(uint16_t a) {
+  return __builtin_riscv_cv_alu_exthz(a);
+}
+
+// CHECK-LABEL: @test_alu_extbs(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    store i8 [[A:%.*]], ptr [[A_ADDR]], align 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[A_ADDR]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = sext i8 [[TMP0]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.extbs(i32 [[TMP1]])
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
+int test_alu_extbs(int8_t a) {
+  return __builtin_riscv_cv_alu_extbs(a);
+}
+
+// CHECK-LABEL: @test_alu_extbz(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    store i8 [[A:%.*]], ptr [[A_ADDR]], align 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[A_ADDR]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.extbz(i32 [[TMP1]])
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
+int test_alu_extbz(uint8_t a) {
+  return __builtin_riscv_cv_alu_extbz(a);
+}
+
+// CHECK-LABEL: @test_alu_clip(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.riscv.cv.alu.clip(i32 [[TMP0]], i32 15)
+// CHECK-NEXT:    ret i32 [[TMP1]]
+//
+int test_alu_clip(int32_t a) {
+  return __builtin_riscv_cv_alu_clip(a, 15);
+}
+
+// CHECK-LABEL: @test_alu_clipu(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.riscv.cv.alu.clipu(i32 [[TMP0]], i32 15)
+// CHECK-NEXT:    ret i32 [[TMP1]]
+//
+int test_alu_clipu(uint32_t a) {
+  return __builtin_riscv_cv_alu_clipu(a, 15);
+}
+
+// CHECK-LABEL: @test_alu_addN(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.addN(i32 [[TMP0]], i32 [[TMP1]], i32 0)
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
+int test_alu_addN(int32_t a, int32_t b) {
+  return __builtin_riscv_cv_alu_addN(a, b, 0);
+}
+
+// CHECK-LABEL: @test_alu_adduN(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.adduN(i32 [[TMP0]], i32 [[TMP1]], i32 0)
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
+int test_alu_adduN(uint32_t a, uint32_t b) {
+  return __builtin_riscv_cv_alu_adduN(a, b, 0);
+}
+
+// CHECK-LABEL: @test_alu_addRN(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.addRN(i32 [[TMP0]], i32 [[TMP1]], i32 0)
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
+int test_alu_addRN(int32_t a, int32_t b) {
+  return __builtin_riscv_cv_alu_addRN(a, b, 0);
+}
+
+// CHECK-LABEL: @test_alu_adduRN(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.adduRN(i32 [[TMP0]], i32 [[TMP1]], i32 0)
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
+int test_alu_adduRN(uint32_t a, uint32_t b) {
+  return __builtin_riscv_cv_alu_adduRN(a, b, 0);
+}
+
+// CHECK-LABEL: @test_alu_subN(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.subN(i32 [[TMP0]], i32 [[TMP1]], i32 0)
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
+int test_alu_subN(int32_t a, int32_t b) {
+  return __builtin_riscv_cv_alu_subN(a, b, 0);
+}
+
+// CHECK-LABEL: @test_alu_subuN(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.subuN(i32 [[TMP0]], i32 [[TMP1]], i32 0)
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
+int test_alu_subuN(uint32_t a, uint32_t b) {
+  return __builtin_riscv_cv_alu_subuN(a, b, 0);
+}
+
+// CHECK-LABEL: @test_alu_subRN(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.subRN(i32 [[TMP0]], i32 [[TMP1]], i32 0)
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
+int test_alu_subRN(int32_t a, int32_t b) {
+  return __builtin_riscv_cv_alu_subRN(a, b, 0);
+}
+
+// CHECK-LABEL: @test_alu_subuRN(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.subuRN(i32 [[TMP0]], i32 [[TMP1]], i32 0)
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
+int test_alu_subuRN(uint32_t a, uint32_t b) {
+  return __builtin_riscv_cv_alu_subuRN(a, b, 0);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCVXCV.td b/llvm/include/llvm/IR/IntrinsicsRISCVXCV.td
index 38263f375c4692..02105900d037a3 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCVXCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCVXCV.td
@@ -59,16 +59,26 @@ let TargetPrefix = "riscv" in {
                             [IntrNoMem, IntrWillReturn, IntrSpeculatable,
                             ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
 
+  def int_riscv_cv_alu_slet  : ScalarCoreVAluGprGprIntrinsic;
+  def int_riscv_cv_alu_sletu : ScalarCoreVAluGprGprIntrinsic;
+  def int_riscv_cv_alu_min   : ScalarCoreVAluGprGprIntrinsic;
+  def int_riscv_cv_alu_minu  : ScalarCoreVAluGprGprIntrinsic;
+  def int_riscv_cv_alu_max   : ScalarCoreVAluGprGprIntrinsic;
+  def int_riscv_cv_alu_maxu  : ScalarCoreVAluGprGprIntrinsic;
+  def int_riscv_cv_alu_exths : ScalarCoreVAluGprIntrinsic;
+  def int_riscv_cv_alu_exthz : ScalarCoreVAluGprIntrinsic;
+  def int_riscv_cv_alu_extbs : ScalarCoreVAluGprIntrinsic;
+  def int_riscv_cv_alu_extbz : ScalarCoreVAluGprIntrinsic;
   def int_riscv_cv_alu_clip   : ScalarCoreVAluGprGprIntrinsic;
   def int_riscv_cv_alu_clipu  : ScalarCoreVAluGprGprIntrinsic;
-  def int_riscv_cv_alu_addn   : ScalarCoreVAluGprGprGprIntrinsic;
-  def int_riscv_cv_alu_addun  : ScalarCoreVAluGprGprGprIntrinsic;
-  def int_riscv_cv_alu_addrn  : ScalarCoreVAluGprGprGprIntrinsic;
-  def int_riscv_cv_alu_addurn : ScalarCoreVAluGprGprGprIntrinsic;
-  def int_riscv_cv_alu_subn   : ScalarCoreVAluGprGprGprIntrinsic;
-  def int_riscv_cv_alu_subun  : ScalarCoreVAluGprGprGprIntrinsic;
-  def int_riscv_cv_alu_subrn  : ScalarCoreVAluGprGprGprIntrinsic;
-  def int_riscv_cv_alu_suburn : ScalarCoreVAluGprGprGprIntrinsic;
+  def int_riscv_cv_alu_addN   : ScalarCoreVAluGprGprGprIntrinsic;
+  def int_riscv_cv_alu_adduN  : ScalarCoreVAluGprGprGprIntrinsic;
+  def int_riscv_cv_alu_addRN  : ScalarCoreVAluGprGprGprIntrinsic;
+  def int_riscv_cv_alu_adduRN : ScalarCoreVAluGprGprGprIntrinsic;
+  def int_riscv_cv_alu_subN   : ScalarCoreVAluGprGprGprIntrinsic;
+  def int_riscv_cv_alu_subuN  : ScalarCoreVAluGprGprGprIntrinsic;
+  def int_riscv_cv_alu_subRN  : ScalarCoreVAluGprGprGprIntrinsic;
+  def int_riscv_cv_alu_subuRN : ScalarCoreVAluGprGprGprIntrinsic;
 
   def int_riscv_cv_mac_mac : ScalarCoreVMacGprGprGprIntrinsic;
   def int_riscv_cv_mac_msu : ScalarCoreVMacGprGprGprIntrinsic;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
index b586b10192fff4..fedf51cddb0533 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
@@ -793,16 +793,27 @@ let Predicates = [HasVendorXCValu, IsRV32], AddedComplexity = 1 in {
   def : Pat<(sext_inreg (XLenVT GPR:$rs1), i8), (CV_EXTBS GPR:$rs1)>;
   def : Pat<(and (XLenVT GPR:$rs1), 0xffff), (CV_EXTHZ GPR:$rs1)>;
 
+  def : PatCoreVAluGprGpr<"slet", "SLET">;
+  def : PatCoreVAluGprGpr<"sletu", "SLETU">;
+  def : PatCoreVAluGprGpr<"min", "MIN">;
+  def : PatCoreVAluGprGpr<"minu", "MINU">;
+  def : PatCoreVAluGprGpr<"max", "MAX">;
+  def : PatCoreVAluGprGpr<"maxu", "MAXU">;
+  def : PatCoreVAluGpr<"exths", "EXTHS">;
+  def : PatCoreVAluGpr<"exthz", "EXTHZ">;
+  def : PatCoreVAluGpr<"extbs", "EXTBS">;
+  def : PatCoreVAluGpr<"extbz", "EXTBZ">;
+
   defm CLIP   : PatCoreVAluGprImm<int_riscv_cv_alu_clip>;
   defm CLIPU  : PatCoreVAluGprImm<int_riscv_cv_alu_clipu>;
-  defm ADDN   : PatCoreVAluGprGprImm<int_riscv_cv_alu_addn>;
-  defm ADDUN  : PatCoreVAluGprGprImm<int_riscv_cv_alu_addun>;
-  defm ADDRN  : PatCoreVAluGprGprImm<int_riscv_cv_alu_addrn>;
-  defm ADDURN : PatCoreVAluGprGprImm<int_riscv_cv_alu_addurn>;
-  defm SUBN   : PatCoreVAluGprGprImm<int_riscv_cv_alu_subn>;
-  defm SUBUN  : PatCoreVAluGprGprImm<int_riscv_cv_alu_subun>;
-  defm SUBRN  : PatCoreVAluGprGprImm<int_riscv_cv_alu_subrn>;
-  defm SUBURN : PatCoreVAluGprGprImm<int_riscv_cv_alu_suburn>;
+  defm ADDN   : PatCoreVAluGprGprImm<int_riscv_cv_alu_addN>;
+  defm ADDUN  : PatCoreVAluGprGprImm<int_riscv_cv_alu_adduN>;
+  defm ADDRN  : PatCoreVAluGprGprImm<int_riscv_cv_alu_addRN>;
+  defm ADDURN : PatCoreVAluGprGprImm<int_riscv_cv_alu_adduRN>;
+  defm SUBN   : PatCoreVAluGprGprImm<int_riscv_cv_alu_subN>;
+  defm SUBUN  : PatCoreVAluGprGprImm<int_riscv_cv_alu_subuN>;
+  defm SUBRN  : PatCoreVAluGprGprImm<int_riscv_cv_alu_subRN>;
+  defm SUBURN : PatCoreVAluGprGprImm<int_riscv_cv_alu_subuRN>;
 } // Predicates = [HasVendorXCValu, IsRV32]
 
 //===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/RISCV/xcvalu.ll b/llvm/test/CodeGen/RISCV/xcvalu.ll
index 1ddfa102aca717..0a81fd8dd717d0 100644
--- a/llvm/test/CodeGen/RISCV/xcvalu.ll
+++ b/llvm/test/CodeGen/RISCV/xcvalu.ll
@@ -91,6 +91,116 @@ define i32 @exthz(i16 %a) {
   ret i32 %1
 }
 
+declare i32 @llvm.riscv.cv.alu.slet(i32, i32)
+
+define i32 @test.cv.alu.slet(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.slet:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.slet a0, a0, a1
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.alu.slet(i32 %a, i32 %b)
+  ret i32 %1
+}
+
+declare i32 @llvm.riscv.cv.alu.sletu(i32, i32)
+
+define i32 @test.cv.alu.sletu(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.sletu:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.sletu a0, a0, a1
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.alu.sletu(i32 %a, i32 %b)
+  ret i32 %1
+}
+
+declare i32 @llvm.riscv.cv.alu.min(i32, i32)
+
+define i32 @test.cv.alu.min(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.min:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.min a0, a0, a1
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.alu.min(i32 %a, i32 %b)
+  ret i32 %1
+}
+
+declare i32 @llvm.riscv.cv.alu.minu(i32, i32)
+
+define i32 @test.cv.alu.minu(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.minu:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.minu a0, a0, a1
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.alu.minu(i32 %a, i32 %b)
+  ret i32 %1
+}
+
+declare i32 @llvm.riscv.cv.alu.max(i32, i32)
+
+define i32 @test.cv.alu.max(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.max:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.max a0, a0, a1
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.alu.max(i32 %a, i32 %b)
+  ret i32 %1
+}
+
+declare i32 @llvm.riscv.cv.alu.maxu(i32, i32)
+
+define i32 @test.cv.alu.maxu(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.maxu:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.maxu a0, a0, a1
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.alu.maxu(i32 %a, i32 %b)
+  ret i32 %1
+}
+
+declare i32 @llvm.riscv.cv.alu.exths(i32)
+
+define i32 @test.cv.alu.exths(i32 %a) {
+; CHECK-LABEL: test.cv.alu.exths:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.exths a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.alu.exths(i32 %a)
+  ret i32 %1
+}
+
+declare i32 @llvm.riscv.cv.alu.exthz(i32)
+
+define i32 @test.cv.alu.exthz(i32 %a) {
+; CHECK-LABEL: test.cv.alu.exthz:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.exthz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.alu.exthz(i32 %a)
+  ret i32 %1
+}
+
+declare i32 @llvm.riscv.cv.alu.extbs(i32)
+
+define i32 @test.cv.alu.extbs(i32 %a) {
+; CHECK-LABEL: test.cv.alu.extbs:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.extbs a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.alu.extbs(i32 %a)
+  ret i32 %1
+}
+
+declare i32 @llvm.riscv.cv.alu.extbz(i32)
+
+define i32 @test.cv.alu.extbz(i32 %a) {
+; CHECK-LABEL: test.cv.alu.extbz:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    cv.extbz a0, a0
+; CHECK-NEXT:    ret
+  %1 = call i32 @llvm.riscv.cv.alu.extbz(i32 %a)
+  ret i32 %1
+}
+
 declare i32 @llvm.riscv.cv.alu.clip(i32, i32)
 
 define i32 @test.cv.alu.clip.case.a(i32 %a) {
@@ -133,170 +243,170 @@ define i32 @test.cv.alu.clipu.case.b(i32 %a) {
   ret i32 %1
 }
 
-declare i32 @llvm.riscv.cv.alu.addn(i32, i32, i32)
+declare i32 @llvm.riscv.cv.alu.addN(i32, i32, i32)
 
-define i32 @test.cv.alu.addn.case.a(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.addn.case.a:
+define i32 @test.cv.alu.addN.case.a(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.addN.case.a:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cv.addn a0, a0, a1, 15
 ; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.addn(i32 %a, i32 %b, i32 15)
+  %1 = call i32 @llvm.riscv.cv.alu.addN(i32 %a, i32 %b, i32 15)
   ret i32 %1
 }
 
-define i32 @test.cv.alu.addn.case.b(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.addn.case.b:
+define i32 @test.cv.alu.addN.case.b(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.addN.case.b:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    cv.addnr a0, a1, a2
 ; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.addn(i32 %a, i32 %b, i32 32)
+  %1 = call i32 @llvm.riscv.cv.alu.addN(i32 %a, i32 %b, i32 32)
   ret i32 %1
 }
 
-declare i32 @llvm.riscv.cv.alu.addun(i32, i32, i32)
+declare i32 @llvm.riscv.cv.alu.adduN(i32, i32, i32)
 
-define i32 @test.cv.alu.addun.case.a(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.addun.case.a:
+define i32 @test.cv.alu.adduN.case.a(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.adduN.case.a:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cv.addun a0, a0, a1, 15
 ; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.addun(i32 %a, i32 %b, i32 15)
+  %1 = call i32 @llvm.riscv.cv.alu.adduN(i32 %a, i32 %b, i32 15)
   ret i32 %1
 }
 
-define i32 @test.cv.alu.addun.case.b(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.addun.case.b:
+define i32 @test.cv.alu.adduN.case.b(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.adduN.case.b:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    cv.addunr a0, a1, a2
 ; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.addun(i32 %a, i32 %b, i32 32)
+  %1 = call i32 @llvm.riscv.cv.alu.adduN(i32 %a, i32 %b, i32 32)
   ret i32 %1
 }
 
-declare i32 @llvm.riscv.cv.alu.addrn(i32, i32, i32)
+declare i32 @llvm.riscv.cv.alu.addRN(i32, i32, i32)
 
-define i32 @test.cv.alu.addrn.case.a(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.addrn.case.a:
+define i32 @test.cv.alu.addRN.case.a(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.addRN.case.a:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cv.addrn a0, a0, a1, 15
 ; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.addrn(i32 %a, i32 %b, i32 15)
+  %1 = call i32 @llvm.riscv.cv.alu.addRN(i32 %a, i32 %b, i32 15)
   ret i32 %1
 }
 
-define i32 @test.cv.alu.addrn.case.b(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.addrn.case.b:
+define i32 @test.cv.alu.addRN.case.b(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.addRN.case.b:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    cv.addrnr a0, a1, a2
 ; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.addrn(i32 %a, i32 %b, i32 32)
+  %1 = call i32 @llvm.riscv.cv.alu.addRN(i32 %a, i32 %b, i32 32)
   ret i32 %1
 }
 
-declare i32 @llvm.riscv.cv.alu.addurn(i32, i32, i32)
+declare i32 @llvm.riscv.cv.alu.adduRN(i32, i32, i32)
 
-define i32 @test.cv.alu.addurn.case.a(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.addurn.case.a:
+define i32 @test.cv.alu.adduRN.case.a(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.adduRN.case.a:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cv.addurn a0, a0, a1, 15
 ; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.addurn(i32 %a, i32 %b, i32 15)
+  %1 = call i32 @llvm.riscv.cv.alu.adduRN(i32 %a, i32 %b, i32 15)
   ret i32 %1
 }
 
-define i32 @test.cv.alu.addurn.case.b(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.addurn.case.b:
+define i32 @test.cv.alu.adduRN.case.b(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.adduRN.case.b:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    cv.addurnr a0, a1, a2
 ; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.addurn(i32 %a, i32 %b, i32 32)
+  %1 = call i32 @llvm.riscv.cv.alu.adduRN(i32 %a, i32 %b, i32 32)
   ret i32 %1
 }
 
-declare i32 @llvm.riscv.cv.alu.subn(i32, i32, i32)
+declare i32 @llvm.riscv.cv.alu.subN(i32, i32, i32)
 
-define i32 @test.cv.alu.subn.case.a(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.subn.case.a:
+define i32 @test.cv.alu.subN.case.a(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.subN.case.a:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cv.subn a0, a0, a1, 15
 ; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.subn(i32 %a, i32 %b, i32 15)
+  %1 = call i32 @llvm.riscv.cv.alu.subN(i32 %a, i32 %b, i32 15)
   ret i32 %1
 }
 
-define i32 @test.cv.alu.subn.case.b(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.subn.case.b:
+define i32 @test.cv.alu.subN.case.b(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.subN.case.b:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    cv.subnr a0, a1, a2
 ; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.subn(i32 %a, i32 %b, i32 32)
+  %1 = call i32 @llvm.riscv.cv.alu.subN(i32 %a, i32 %b, i32 32)
   ret i32 %1
 }
 
-declare i32 @llvm.riscv.cv.alu.subun(i32, i32, i32)
+declare i32 @llvm.riscv.cv.alu.subuN(i32, i32, i32)
 
-define i32 @test.cv.alu.subun.case.a(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.subun.case.a:
+define i32 @test.cv.alu.subuN.case.a(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.subuN.case.a:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cv.subun a0, a0, a1, 15
 ; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.subun(i32 %a, i32 %b, i32 15)
+  %1 = call i32 @llvm.riscv.cv.alu.subuN(i32 %a, i32 %b, i32 15)
   ret i32 %1
 }
 
-define i32 @test.cv.alu.subun.case.b(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.subun.case.b:
+define i32 @test.cv.alu.subuN.case.b(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.subuN.case.b:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    cv.subunr a0, a1, a2
 ; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.subun(i32 %a, i32 %b, i32 32)
+  %1 = call i32 @llvm.riscv.cv.alu.subuN(i32 %a, i32 %b, i32 32)
   ret i32 %1
 }
 
-declare i32 @llvm.riscv.cv.alu.subrn(i32, i32, i32)
+declare i32 @llvm.riscv.cv.alu.subRN(i32, i32, i32)
 
-define i32 @test.cv.alu.subrn.case.a(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.subrn.case.a:
+define i32 @test.cv.alu.subRN.case.a(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.subRN.case.a:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cv.subrn a0, a0, a1, 15
 ; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.subrn(i32 %a, i32 %b, i32 15)
+  %1 = call i32 @llvm.riscv.cv.alu.subRN(i32 %a, i32 %b, i32 15)
   ret i32 %1
 }
 
-define i32 @test.cv.alu.subrn.case.b(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.subrn.case.b:
+define i32 @test.cv.alu.subRN.case.b(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.subRN.case.b:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    cv.subrnr a0, a1, a2
 ; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.subrn(i32 %a, i32 %b, i32 32)
+  %1 = call i32 @llvm.riscv.cv.alu.subRN(i32 %a, i32 %b, i32 32)
   ret i32 %1
 }
 
-declare i32 @llvm.riscv.cv.alu.suburn(i32, i32, i32)
+declare i32 @llvm.riscv.cv.alu.subuRN(i32, i32, i32)
 
-define i32 @test.cv.alu.suburn.case.a(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.suburn.case.a:
+define i32 @test.cv.alu.subuRN.case.a(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.subuRN.case.a:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    cv.suburn a0, a0, a1, 15
 ; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.suburn(i32 %a, i32 %b, i32 15)
+  %1 = call i32 @llvm.riscv.cv.alu.subuRN(i32 %a, i32 %b, i32 15)
   ret i32 %1
 }
 
-define i32 @test.cv.alu.suburn.case.b(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.suburn.case.b:
+define i32 @test.cv.alu.subuRN.case.b(i32 %a, i32 %b) {
+; CHECK-LABEL: test.cv.alu.subuRN.case.b:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    li a2, 32
 ; CHECK-NEXT:    cv.suburnr a0, a1, a2
 ; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.suburn(i32 %a, i32 %b, i32 32)
+  %1 = call i32 @llvm.riscv.cv.alu.subuRN(i32 %a, i32 %b, i32 32)
   ret i32 %1
 }

>From ec017e38e2828fbb217f9df451fca27554857f99 Mon Sep 17 00:00:00 2001
From: Qihan Cai <caiqihan021 at hotmail.com>
Date: Thu, 8 Aug 2024 14:00:45 +1000
Subject: [PATCH 2/8] Declare builtins to take MachineType

---
 .../include/clang/Basic/BuiltinsRISCVXCV.def  | 24 ++++++++---------
 clang/lib/CodeGen/CGBuiltin.cpp               | 26 +------------------
 clang/test/CodeGen/RISCV/riscv-xcvalu.c       | 24 ++++++++---------
 3 files changed, 25 insertions(+), 49 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsRISCVXCV.def b/clang/include/clang/Basic/BuiltinsRISCVXCV.def
index 29c59cdf005d03..e7f2bd613c5a9c 100644
--- a/clang/include/clang/Basic/BuiltinsRISCVXCV.def
+++ b/clang/include/clang/Basic/BuiltinsRISCVXCV.def
@@ -21,21 +21,21 @@ TARGET_BUILTIN(alu_min, "ZiZiZi", "nc", "xcvalu")
 TARGET_BUILTIN(alu_minu, "UZiUZiUZi", "nc", "xcvalu")
 TARGET_BUILTIN(alu_max, "ZiZiZi", "nc", "xcvalu")
 TARGET_BUILTIN(alu_maxu, "UZiUZiUZi", "nc", "xcvalu")
-TARGET_BUILTIN(alu_exths, "Zis", "nc", "xcvalu")
-TARGET_BUILTIN(alu_exthz, "UZiUs", "nc", "xcvalu")
-TARGET_BUILTIN(alu_extbs, "Zic", "nc", "xcvalu")
-TARGET_BUILTIN(alu_extbz, "UZiUc", "nc", "xcvalu")
+TARGET_BUILTIN(alu_exths, "Zii", "nc", "xcvalu")
+TARGET_BUILTIN(alu_exthz, "UZiUi", "nc", "xcvalu")
+TARGET_BUILTIN(alu_extbs, "Zii", "nc", "xcvalu")
+TARGET_BUILTIN(alu_extbz, "UZiUi", "nc", "xcvalu")
 
 TARGET_BUILTIN(alu_clip, "ZiZiUZi", "nc", "xcvalu")
 TARGET_BUILTIN(alu_clipu, "UZiUZiUZi", "nc", "xcvalu")
-TARGET_BUILTIN(alu_addN, "ZiZiUZiUc", "nc", "xcvalu")
-TARGET_BUILTIN(alu_adduN, "UZiUZiUZiUc", "nc", "xcvalu")
-TARGET_BUILTIN(alu_addRN, "ZiZiZiUc", "nc", "xcvalu")
-TARGET_BUILTIN(alu_adduRN, "UZiUZiUZiUc", "nc", "xcvalu")
-TARGET_BUILTIN(alu_subN, "ZiZiUZiUc", "nc", "xcvalu")
-TARGET_BUILTIN(alu_subuN, "UZiUZiUZiUc", "nc", "xcvalu")
-TARGET_BUILTIN(alu_subRN, "ZiZiZiUc", "nc", "xcvalu")
-TARGET_BUILTIN(alu_subuRN, "UZiUZiUZiUc", "nc", "xcvalu")
+TARGET_BUILTIN(alu_addN, "ZiZiUZiUi", "nc", "xcvalu")
+TARGET_BUILTIN(alu_adduN, "UZiUZiUZiUi", "nc", "xcvalu")
+TARGET_BUILTIN(alu_addRN, "ZiZiZiUi", "nc", "xcvalu")
+TARGET_BUILTIN(alu_adduRN, "UZiUZiUZiUi", "nc", "xcvalu")
+TARGET_BUILTIN(alu_subN, "ZiZiUZiUi", "nc", "xcvalu")
+TARGET_BUILTIN(alu_subuN, "UZiUZiUZiUi", "nc", "xcvalu")
+TARGET_BUILTIN(alu_subRN, "ZiZiZiUi", "nc", "xcvalu")
+TARGET_BUILTIN(alu_subuRN, "UZiUZiUZiUi", "nc", "xcvalu")
 
 #undef BUILTIN
 #undef TARGET_BUILTIN
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index b0dfc12a0f559b..74d54aea84eda7 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -22114,30 +22114,6 @@ Value *CodeGenFunction::EmitHexagonBuiltinExpr(unsigned BuiltinID,
   return nullptr;
 }
 
-static Value *EmitXCVIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID,
-                               unsigned IntrinsicID,
-                               MutableArrayRef<Value *> Ops,
-                               const CallExpr *E) {
-  llvm::Type *MachineType =
-      llvm::IntegerType::getInt32Ty(CGF.CGM.getLLVMContext());
-  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
-    if (Ops[i]->getType() != MachineType) {
-      QualType type = E->getArg(i)->getType();
-      assert((type->isSignedIntegerType() || type->isUnsignedIntegerType() ||
-              type->isPointerType()) &&
-             "Argument of Core-V builtin must have signed or unsigned integer "
-             "or Pointer type");
-      if (type->isSignedIntegerType()) {
-        Ops[i] = CGF.Builder.CreateSExt(Ops[i], MachineType);
-      } else if ((type->isUnsignedIntegerType())) {
-        Ops[i] = CGF.Builder.CreateZExt(Ops[i], MachineType);
-      }
-    }
-  }
-  llvm::Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
-  return CGF.Builder.CreateCall(F, Ops);
-}
-
 Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
                                              const CallExpr *E,
                                              ReturnValueSlot ReturnValue) {
@@ -22368,7 +22344,7 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
 #define BUILTIN(NAME, TYPE, ATTRS)                                             \
   case RISCVXCV::BI__builtin_riscv_cv_##NAME:                                  \
     ID = Intrinsic::riscv_cv_##NAME;                                           \
-    return EmitXCVIntrinsic(*this, BuiltinID, ID, Ops, E);
+    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
 #include "clang/Basic/BuiltinsRISCVXCV.def"
 
   // Vector builtins are handled from here.
diff --git a/clang/test/CodeGen/RISCV/riscv-xcvalu.c b/clang/test/CodeGen/RISCV/riscv-xcvalu.c
index a3b8c07f22c815..c0554d7aa662f9 100644
--- a/clang/test/CodeGen/RISCV/riscv-xcvalu.c
+++ b/clang/test/CodeGen/RISCV/riscv-xcvalu.c
@@ -111,9 +111,9 @@ int test_alu_maxu(uint32_t a, uint32_t b) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK-NEXT:    store i16 [[A:%.*]], ptr [[A_ADDR]], align 2
 // CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[A_ADDR]], align 2
-// CHECK-NEXT:    [[TMP1:%.*]] = sext i16 [[TMP0]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.exths(i32 [[TMP1]])
-// CHECK-NEXT:    ret i32 [[TMP2]]
+// CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP0]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.riscv.cv.alu.exths(i32 [[CONV]])
+// CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int test_alu_exths(int16_t a) {
   return __builtin_riscv_cv_alu_exths(a);
@@ -124,9 +124,9 @@ int test_alu_exths(int16_t a) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK-NEXT:    store i16 [[A:%.*]], ptr [[A_ADDR]], align 2
 // CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[A_ADDR]], align 2
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i16 [[TMP0]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.exthz(i32 [[TMP1]])
-// CHECK-NEXT:    ret i32 [[TMP2]]
+// CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[TMP0]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.riscv.cv.alu.exthz(i32 [[CONV]])
+// CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int test_alu_exthz(uint16_t a) {
   return __builtin_riscv_cv_alu_exthz(a);
@@ -137,9 +137,9 @@ int test_alu_exthz(uint16_t a) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i8, align 1
 // CHECK-NEXT:    store i8 [[A:%.*]], ptr [[A_ADDR]], align 1
 // CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[A_ADDR]], align 1
-// CHECK-NEXT:    [[TMP1:%.*]] = sext i8 [[TMP0]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.extbs(i32 [[TMP1]])
-// CHECK-NEXT:    ret i32 [[TMP2]]
+// CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[TMP0]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.riscv.cv.alu.extbs(i32 [[CONV]])
+// CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int test_alu_extbs(int8_t a) {
   return __builtin_riscv_cv_alu_extbs(a);
@@ -150,9 +150,9 @@ int test_alu_extbs(int8_t a) {
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i8, align 1
 // CHECK-NEXT:    store i8 [[A:%.*]], ptr [[A_ADDR]], align 1
 // CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[A_ADDR]], align 1
-// CHECK-NEXT:    [[TMP1:%.*]] = zext i8 [[TMP0]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.extbz(i32 [[TMP1]])
-// CHECK-NEXT:    ret i32 [[TMP2]]
+// CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i32
+// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.riscv.cv.alu.extbz(i32 [[CONV]])
+// CHECK-NEXT:    ret i32 [[TMP1]]
 //
 int test_alu_extbz(uint8_t a) {
   return __builtin_riscv_cv_alu_extbz(a);

>From b8262f981762058240b664a19f384655d79c95e9 Mon Sep 17 00:00:00 2001
From: Qihan Cai <caiqihan021 at hotmail.com>
Date: Thu, 8 Aug 2024 14:01:56 +1000
Subject: [PATCH 3/8] fix header location

---
 clang/include/module.modulemap | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/clang/include/module.modulemap b/clang/include/module.modulemap
index 7b177db4505e5c..6a71d5009e26fe 100644
--- a/clang/include/module.modulemap
+++ b/clang/include/module.modulemap
@@ -55,7 +55,7 @@ module Clang_Basic {
   textual header "clang/Basic/BuiltinsNEON.def"
   textual header "clang/Basic/BuiltinsNVPTX.def"
   textual header "clang/Basic/BuiltinsPPC.def"
-  textual header "Basic/BuiltinsRISCVXCV.def"
+  textual header "clang/Basic/BuiltinsRISCVXCV.def"
   textual header "clang/Basic/BuiltinsRISCVVector.def"
   textual header "clang/Basic/BuiltinsSME.def"
   textual header "clang/Basic/BuiltinsSVE.def"

>From 4115204b72ef4028a2250e733a4eae6b8cd3bfd8 Mon Sep 17 00:00:00 2001
From: Qihan Cai <caiqihan021 at hotmail.com>
Date: Fri, 30 Aug 2024 13:29:20 +1000
Subject: [PATCH 4/8] remove unnecessary cv.min/max, move XCV builtins into
 RISCV namespace

---
 clang/include/clang/Basic/BuiltinsRISCV.td    |  5 ++
 .../include/clang/Basic/BuiltinsRISCVXCV.def  | 41 ------------
 clang/include/clang/Basic/BuiltinsRISCVXCV.td | 43 +++++++++++++
 clang/include/clang/Basic/TargetBuiltins.h    | 10 ---
 clang/include/module.modulemap                |  1 -
 clang/lib/Basic/Targets/RISCV.cpp             |  9 +--
 clang/lib/CodeGen/CGBuiltin.cpp               | 64 ++++++++++++++++---
 clang/lib/Headers/riscv_corev_alu.h           |  4 +-
 clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c |  4 +-
 clang/test/CodeGen/RISCV/riscv-xcvalu.c       | 30 ---------
 10 files changed, 109 insertions(+), 102 deletions(-)
 delete mode 100644 clang/include/clang/Basic/BuiltinsRISCVXCV.def
 create mode 100644 clang/include/clang/Basic/BuiltinsRISCVXCV.td

diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index 4cc89a8a9d8af2..3263603a8a1cf6 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -146,3 +146,8 @@ let Features = "zihintntl", Attributes = [CustomTypeChecking] in {
 def ntl_load : RISCVBuiltin<"void(...)">;
 def ntl_store : RISCVBuiltin<"void(...)">;
 } // Features = "zihintntl", Attributes = [CustomTypeChecking]
+
+//===----------------------------------------------------------------------===//
+// XCV extensions.
+//===----------------------------------------------------------------------===//
+include "clang/Basic/BuiltinsRISCVXCV.td"
diff --git a/clang/include/clang/Basic/BuiltinsRISCVXCV.def b/clang/include/clang/Basic/BuiltinsRISCVXCV.def
deleted file mode 100644
index e7f2bd613c5a9c..00000000000000
--- a/clang/include/clang/Basic/BuiltinsRISCVXCV.def
+++ /dev/null
@@ -1,41 +0,0 @@
-//==- BuiltinsRISCVXCV.def - RISC-V CORE-V Builtin database ----*- C++ -*-==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines the CORE-V-specific builtin function database.  Users of
-// this file must define the BUILTIN macro to make use of this information.
-//
-//===----------------------------------------------------------------------===//
-
-#if defined(BUILTIN) && !defined(TARGET_BUILTIN)
-#   define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE) BUILTIN(ID, TYPE, ATTRS)
-#endif
-
-TARGET_BUILTIN(alu_slet, "ZiZiZi", "nc", "xcvalu")
-TARGET_BUILTIN(alu_sletu, "ZiUZiUZi", "nc", "xcvalu")
-TARGET_BUILTIN(alu_min, "ZiZiZi", "nc", "xcvalu")
-TARGET_BUILTIN(alu_minu, "UZiUZiUZi", "nc", "xcvalu")
-TARGET_BUILTIN(alu_max, "ZiZiZi", "nc", "xcvalu")
-TARGET_BUILTIN(alu_maxu, "UZiUZiUZi", "nc", "xcvalu")
-TARGET_BUILTIN(alu_exths, "Zii", "nc", "xcvalu")
-TARGET_BUILTIN(alu_exthz, "UZiUi", "nc", "xcvalu")
-TARGET_BUILTIN(alu_extbs, "Zii", "nc", "xcvalu")
-TARGET_BUILTIN(alu_extbz, "UZiUi", "nc", "xcvalu")
-
-TARGET_BUILTIN(alu_clip, "ZiZiUZi", "nc", "xcvalu")
-TARGET_BUILTIN(alu_clipu, "UZiUZiUZi", "nc", "xcvalu")
-TARGET_BUILTIN(alu_addN, "ZiZiUZiUi", "nc", "xcvalu")
-TARGET_BUILTIN(alu_adduN, "UZiUZiUZiUi", "nc", "xcvalu")
-TARGET_BUILTIN(alu_addRN, "ZiZiZiUi", "nc", "xcvalu")
-TARGET_BUILTIN(alu_adduRN, "UZiUZiUZiUi", "nc", "xcvalu")
-TARGET_BUILTIN(alu_subN, "ZiZiUZiUi", "nc", "xcvalu")
-TARGET_BUILTIN(alu_subuN, "UZiUZiUZiUi", "nc", "xcvalu")
-TARGET_BUILTIN(alu_subRN, "ZiZiZiUi", "nc", "xcvalu")
-TARGET_BUILTIN(alu_subuRN, "UZiUZiUZiUi", "nc", "xcvalu")
-
-#undef BUILTIN
-#undef TARGET_BUILTIN
diff --git a/clang/include/clang/Basic/BuiltinsRISCVXCV.td b/clang/include/clang/Basic/BuiltinsRISCVXCV.td
new file mode 100644
index 00000000000000..7708082059b75a
--- /dev/null
+++ b/clang/include/clang/Basic/BuiltinsRISCVXCV.td
@@ -0,0 +1,43 @@
+//==- BuiltinsRISCVXCV.td - RISC-V CORE-V Builtin database    ----*- C++ -*-==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the CORE-V-specific builtin function database.  Users of
+// this file must define the BUILTIN macro to make use of this information.
+//
+//===----------------------------------------------------------------------===//
+
+class RISCXCVBuiltin<string prototype, string features = ""> : TargetBuiltin {
+  let Spellings = ["__builtin_riscv_cv_" # NAME];
+  let Prototype = prototype;
+  let Features = features;
+}
+
+let Attributes = [NoThrow, Const] in {
+//===----------------------------------------------------------------------===//
+// XCValu extension.
+//===----------------------------------------------------------------------===//
+def alu_slet  : RISCXCVBuiltin<"int(int, int)", "xcvalu">;
+def alu_sletu : RISCXCVBuiltin<"int(unsigned int, unsigned int)", "xcvalu">;
+def alu_minu  : RISCXCVBuiltin<"unsigned int(unsigned int, unsigned int)", "xcvalu">;
+def alu_maxu  : RISCXCVBuiltin<"unsigned int(unsigned int, unsigned int)", "xcvalu">;
+def alu_exths : RISCXCVBuiltin<"int(int)", "xcvalu">;
+def alu_exthz : RISCXCVBuiltin<"unsigned int(unsigned int)", "xcvalu">;
+def alu_extbs : RISCXCVBuiltin<"int(int)", "xcvalu">;
+def alu_extbz : RISCXCVBuiltin<"unsigned int(unsigned int)", "xcvalu">;
+
+def alu_clip   : RISCXCVBuiltin<"int(int, int)", "xcvalu">;
+def alu_clipu  : RISCXCVBuiltin<"unsigned int(unsigned int, unsigned int)", "xcvalu">;
+def alu_addN   : RISCXCVBuiltin<"int(int, int, unsigned int)", "xcvalu">;
+def alu_adduN  : RISCXCVBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int)", "xcvalu">;
+def alu_addRN  : RISCXCVBuiltin<"int(int, int, unsigned int)", "xcvalu">;
+def alu_adduRN : RISCXCVBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int)", "xcvalu">;
+def alu_subN   : RISCXCVBuiltin<"int(int, int, unsigned int)", "xcvalu">;
+def alu_subuN  : RISCXCVBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int)", "xcvalu">;
+def alu_subRN  : RISCXCVBuiltin<"int(int, int, unsigned int)", "xcvalu">;
+def alu_subuRN : RISCXCVBuiltin<"unsigned int(unsigned int, unsigned int, unsigned int)", "xcvalu">;
+} // Attributes = [NoThrow, Const]
diff --git a/clang/include/clang/Basic/TargetBuiltins.h b/clang/include/clang/Basic/TargetBuiltins.h
index f25fa22a95a1ed..d0f41b17c154f3 100644
--- a/clang/include/clang/Basic/TargetBuiltins.h
+++ b/clang/include/clang/Basic/TargetBuiltins.h
@@ -152,22 +152,12 @@ namespace clang {
   };
   }
 
-  namespace RISCVXCV {
-  enum {
-    LastRVVBuiltin = RISCVVector::FirstTSBuiltin - 1,
-#define BUILTIN(ID, TYPE, ATTRS) BI__builtin_riscv_cv_##ID,
-#include "clang/Basic/BuiltinsRISCVXCV.def"
-    FirstTSBuiltin,
-  };
-  } // namespace RISCVXCV
-
   /// RISCV builtins
   namespace RISCV {
   enum {
     LastTIBuiltin = clang::Builtin::FirstTSBuiltin - 1,
     FirstRVVBuiltin = clang::Builtin::FirstTSBuiltin,
     LastRVVBuiltin = RISCVVector::FirstTSBuiltin - 1,
-    LastXCVBuiltin = RISCVXCV::FirstTSBuiltin - 1,
 #define BUILTIN(ID, TYPE, ATTRS) BI##ID,
 #include "clang/Basic/BuiltinsRISCV.inc"
     LastTSBuiltin
diff --git a/clang/include/module.modulemap b/clang/include/module.modulemap
index 6a71d5009e26fe..b6ab99bb85d8a7 100644
--- a/clang/include/module.modulemap
+++ b/clang/include/module.modulemap
@@ -55,7 +55,6 @@ module Clang_Basic {
   textual header "clang/Basic/BuiltinsNEON.def"
   textual header "clang/Basic/BuiltinsNVPTX.def"
   textual header "clang/Basic/BuiltinsPPC.def"
-  textual header "clang/Basic/BuiltinsRISCVXCV.def"
   textual header "clang/Basic/BuiltinsRISCVVector.def"
   textual header "clang/Basic/BuiltinsSME.def"
   textual header "clang/Basic/BuiltinsSVE.def"
diff --git a/clang/lib/Basic/Targets/RISCV.cpp b/clang/lib/Basic/Targets/RISCV.cpp
index 03e4b07e12305f..b6ea4440507ea1 100644
--- a/clang/lib/Basic/Targets/RISCV.cpp
+++ b/clang/lib/Basic/Targets/RISCV.cpp
@@ -233,14 +233,7 @@ static constexpr Builtin::Info BuiltinInfo[] = {
   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
 #include "clang/Basic/BuiltinsRISCVVector.def"
 #define BUILTIN(ID, TYPE, ATTRS)                                               \
-  {"__builtin_riscv_cv_" #ID, TYPE,         ATTRS, nullptr,                    \
-   HeaderDesc::NO_HEADER,     ALL_LANGUAGES},
-#define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
-  {"__builtin_riscv_cv_" #ID, TYPE,         ATTRS, FEATURE,                    \
-   HeaderDesc::NO_HEADER,     ALL_LANGUAGES},
-#include "clang/Basic/BuiltinsRISCVXCV.def"
-#define BUILTIN(ID, TYPE, ATTRS)                                               \
-  \ {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
+  {#ID, TYPE, ATTRS, nullptr, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
 #define TARGET_BUILTIN(ID, TYPE, ATTRS, FEATURE)                               \
   {#ID, TYPE, ATTRS, FEATURE, HeaderDesc::NO_HEADER, ALL_LANGUAGES},
 #include "clang/Basic/BuiltinsRISCV.inc"
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 74d54aea84eda7..9c96adb85df61e 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -22339,15 +22339,63 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
 
     return Store;
   }
+  // XCValu
+  case RISCV::BI__builtin_riscv_cv_alu_addN:
+    ID = Intrinsic::riscv_cv_alu_addN;
+    break;
+  case RISCV::BI__builtin_riscv_cv_alu_addRN:
+    ID = Intrinsic::riscv_cv_alu_addRN;
+    break;
+  case RISCV::BI__builtin_riscv_cv_alu_adduN:
+    ID = Intrinsic::riscv_cv_alu_adduN;
+    break;
+  case RISCV::BI__builtin_riscv_cv_alu_adduRN:
+    ID = Intrinsic::riscv_cv_alu_adduRN;
+    break;
+  case RISCV::BI__builtin_riscv_cv_alu_clip:
+    ID = Intrinsic::riscv_cv_alu_clip;
+    break;
+  case RISCV::BI__builtin_riscv_cv_alu_clipu:
+    ID = Intrinsic::riscv_cv_alu_clipu;
+    break;
+  case RISCV::BI__builtin_riscv_cv_alu_extbs:
+    ID = Intrinsic::riscv_cv_alu_extbs;
+    break;
+  case RISCV::BI__builtin_riscv_cv_alu_extbz:
+    ID = Intrinsic::riscv_cv_alu_extbz;
+    break;
+  case RISCV::BI__builtin_riscv_cv_alu_exths:
+    ID = Intrinsic::riscv_cv_alu_exths;
+    break;
+  case RISCV::BI__builtin_riscv_cv_alu_exthz:
+    ID = Intrinsic::riscv_cv_alu_exthz;
+    break;
+  case RISCV::BI__builtin_riscv_cv_alu_maxu:
+    ID = Intrinsic::riscv_cv_alu_maxu;
+    break;
+  case RISCV::BI__builtin_riscv_cv_alu_minu:
+    ID = Intrinsic::riscv_cv_alu_minu;
+    break;
+  case RISCV::BI__builtin_riscv_cv_alu_slet:
+    ID = Intrinsic::riscv_cv_alu_slet;
+    break;
+  case RISCV::BI__builtin_riscv_cv_alu_sletu:
+    ID = Intrinsic::riscv_cv_alu_sletu;
+    break;
+  case RISCV::BI__builtin_riscv_cv_alu_subN:
+    ID = Intrinsic::riscv_cv_alu_subN;
+    break;
+  case RISCV::BI__builtin_riscv_cv_alu_subRN:
+    ID = Intrinsic::riscv_cv_alu_subRN;
+    break;
+  case RISCV::BI__builtin_riscv_cv_alu_subuN:
+    ID = Intrinsic::riscv_cv_alu_subuN;
+    break;
+  case RISCV::BI__builtin_riscv_cv_alu_subuRN:
+    ID = Intrinsic::riscv_cv_alu_subuRN;
+    break;
 
-// Core-V
-#define BUILTIN(NAME, TYPE, ATTRS)                                             \
-  case RISCVXCV::BI__builtin_riscv_cv_##NAME:                                  \
-    ID = Intrinsic::riscv_cv_##NAME;                                           \
-    return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
-#include "clang/Basic/BuiltinsRISCVXCV.def"
-
-  // Vector builtins are handled from here.
+    // Vector builtins are handled from here.
 #include "clang/Basic/riscv_vector_builtin_cg.inc"
   // SiFive Vector builtins are handled from here.
 #include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
diff --git a/clang/lib/Headers/riscv_corev_alu.h b/clang/lib/Headers/riscv_corev_alu.h
index d6230022579b8e..205197fbe4c540 100644
--- a/clang/lib/Headers/riscv_corev_alu.h
+++ b/clang/lib/Headers/riscv_corev_alu.h
@@ -34,7 +34,7 @@ __riscv_cv_alu_sletu(unsigned long a, unsigned long b) {
 }
 
 static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_min(long a, long b) {
-  return __builtin_riscv_cv_alu_min(a, b);
+  return min(a, b);
 }
 
 static __inline__ unsigned long __DEFAULT_FN_ATTRS
@@ -43,7 +43,7 @@ __riscv_cv_alu_minu(unsigned long a, unsigned long b) {
 }
 
 static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_max(long a, long b) {
-  return __builtin_riscv_cv_alu_max(a, b);
+  return max(a, b);
 }
 
 static __inline__ unsigned long __DEFAULT_FN_ATTRS
diff --git a/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c b/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c
index d5c16fbe3d7663..eb10c817a1f2b4 100644
--- a/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c
+++ b/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c
@@ -18,7 +18,7 @@ int test_alu_sletu(uint32_t a, uint32_t b) {
 }
 
 // CHECK-LABEL: @test_alu_min
-// CHECK: @llvm.riscv.cv.alu.min
+// CHECK: @min
 int test_alu_min(int32_t a, int32_t b) {
   return __riscv_cv_alu_min(a, b);
 }
@@ -30,7 +30,7 @@ int test_alu_minu(uint32_t a, uint32_t b) {
 }
 
 // CHECK-LABEL: @test_alu_max
-// CHECK: @llvm.riscv.cv.alu.max
+// CHECK: @max
 int test_alu_max(int32_t a, int32_t b) {
   return __riscv_cv_alu_max(a, b);
 }
diff --git a/clang/test/CodeGen/RISCV/riscv-xcvalu.c b/clang/test/CodeGen/RISCV/riscv-xcvalu.c
index c0554d7aa662f9..2445c0bd07fd5b 100644
--- a/clang/test/CodeGen/RISCV/riscv-xcvalu.c
+++ b/clang/test/CodeGen/RISCV/riscv-xcvalu.c
@@ -46,21 +46,6 @@ int test_alu_sletu(uint32_t a, uint32_t b) {
   return __builtin_riscv_cv_alu_sletu(a, b);
 }
 
-// CHECK-LABEL: @test_alu_min(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.min(i32 [[TMP0]], i32 [[TMP1]])
-// CHECK-NEXT:    ret i32 [[TMP2]]
-//
-int test_alu_min(int32_t a, int32_t b) {
-  return __builtin_riscv_cv_alu_min(a, b);
-}
-
 // CHECK-LABEL: @test_alu_minu(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
@@ -76,21 +61,6 @@ int test_alu_minu(uint32_t a, uint32_t b) {
   return __builtin_riscv_cv_alu_minu(a, b);
 }
 
-// CHECK-LABEL: @test_alu_max(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.max(i32 [[TMP0]], i32 [[TMP1]])
-// CHECK-NEXT:    ret i32 [[TMP2]]
-//
-int test_alu_max(int32_t a, int32_t b) {
-  return __builtin_riscv_cv_alu_max(a, b);
-}
-
 // CHECK-LABEL: @test_alu_maxu(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4

>From 9c3b3544174a712968f8b63c9863072aed4dad56 Mon Sep 17 00:00:00 2001
From: Qihan Cai <caiqihan021 at hotmail.com>
Date: Fri, 13 Sep 2024 11:34:07 +1000
Subject: [PATCH 5/8] fix wrongly referenced min/max in riscv_corev_alu.h,
 generate c-api tests. Remove exths/exthz intrinsics by replacing them with
 trunc+extend

---
 clang/include/clang/Basic/BuiltinsRISCVXCV.td |   2 -
 clang/lib/CodeGen/CGBuiltin.cpp               |  20 +-
 clang/lib/Headers/riscv_corev_alu.h           |   8 +-
 clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c | 386 ++++++++++++++++--
 clang/test/CodeGen/RISCV/riscv-xcvalu.c       |  40 +-
 llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td    |   6 -
 llvm/test/CodeGen/RISCV/xcvalu.ll             |  66 ---
 7 files changed, 364 insertions(+), 164 deletions(-)

diff --git a/clang/include/clang/Basic/BuiltinsRISCVXCV.td b/clang/include/clang/Basic/BuiltinsRISCVXCV.td
index 7708082059b75a..06ce07ade5c122 100644
--- a/clang/include/clang/Basic/BuiltinsRISCVXCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCVXCV.td
@@ -23,8 +23,6 @@ let Attributes = [NoThrow, Const] in {
 //===----------------------------------------------------------------------===//
 def alu_slet  : RISCXCVBuiltin<"int(int, int)", "xcvalu">;
 def alu_sletu : RISCXCVBuiltin<"int(unsigned int, unsigned int)", "xcvalu">;
-def alu_minu  : RISCXCVBuiltin<"unsigned int(unsigned int, unsigned int)", "xcvalu">;
-def alu_maxu  : RISCXCVBuiltin<"unsigned int(unsigned int, unsigned int)", "xcvalu">;
 def alu_exths : RISCXCVBuiltin<"int(int)", "xcvalu">;
 def alu_exthz : RISCXCVBuiltin<"unsigned int(unsigned int)", "xcvalu">;
 def alu_extbs : RISCXCVBuiltin<"int(int)", "xcvalu">;
diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 9c96adb85df61e..0aa576d9a4a290 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -22364,18 +22364,13 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
   case RISCV::BI__builtin_riscv_cv_alu_extbz:
     ID = Intrinsic::riscv_cv_alu_extbz;
     break;
-  case RISCV::BI__builtin_riscv_cv_alu_exths:
-    ID = Intrinsic::riscv_cv_alu_exths;
-    break;
+  case RISCV::BI__builtin_riscv_cv_alu_exths: {
+    return Builder.CreateSExt(Builder.CreateTrunc(Ops[0], Int16Ty), Int32Ty,
+                              "exths");
+  }
   case RISCV::BI__builtin_riscv_cv_alu_exthz:
-    ID = Intrinsic::riscv_cv_alu_exthz;
-    break;
-  case RISCV::BI__builtin_riscv_cv_alu_maxu:
-    ID = Intrinsic::riscv_cv_alu_maxu;
-    break;
-  case RISCV::BI__builtin_riscv_cv_alu_minu:
-    ID = Intrinsic::riscv_cv_alu_minu;
-    break;
+    return Builder.CreateZExt(Builder.CreateTrunc(Ops[0], Int16Ty), Int32Ty,
+                              "exthz");
   case RISCV::BI__builtin_riscv_cv_alu_slet:
     ID = Intrinsic::riscv_cv_alu_slet;
     break;
@@ -22397,7 +22392,8 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
 
     // Vector builtins are handled from here.
 #include "clang/Basic/riscv_vector_builtin_cg.inc"
-  // SiFive Vector builtins are handled from here.
+
+    // SiFive Vector builtins are handled from here.
 #include "clang/Basic/riscv_sifive_vector_builtin_cg.inc"
   }
 
diff --git a/clang/lib/Headers/riscv_corev_alu.h b/clang/lib/Headers/riscv_corev_alu.h
index 205197fbe4c540..d2832ddf72efeb 100644
--- a/clang/lib/Headers/riscv_corev_alu.h
+++ b/clang/lib/Headers/riscv_corev_alu.h
@@ -34,21 +34,21 @@ __riscv_cv_alu_sletu(unsigned long a, unsigned long b) {
 }
 
 static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_min(long a, long b) {
-  return min(a, b);
+  return __builtin_elementwise_min(a, b);
 }
 
 static __inline__ unsigned long __DEFAULT_FN_ATTRS
 __riscv_cv_alu_minu(unsigned long a, unsigned long b) {
-  return __builtin_riscv_cv_alu_minu(a, b);
+  return __builtin_elementwise_min(a, b);
 }
 
 static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_max(long a, long b) {
-  return max(a, b);
+  return __builtin_elementwise_max(a, b);
 }
 
 static __inline__ unsigned long __DEFAULT_FN_ATTRS
 __riscv_cv_alu_maxu(unsigned long a, unsigned long b) {
-  return __builtin_riscv_cv_alu_maxu(a, b);
+  return __builtin_elementwise_max(a, b);
 }
 
 static __inline__ long __DEFAULT_FN_ATTRS __riscv_cv_alu_exths(int16_t a) {
diff --git a/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c b/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c
index eb10c817a1f2b4..e6dd38149c090a 100644
--- a/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c
+++ b/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c
@@ -5,122 +5,428 @@
 #include <stdint.h>
 #include <riscv_corev_alu.h>
 
-// CHECK-LABEL: @test_alu_slet
-// CHECK: @llvm.riscv.cv.alu.slet
+// CHECK-LABEL: @test_alu_slet(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.riscv.cv.alu.slet(i32 [[TMP2]], i32 [[TMP3]])
+// CHECK-NEXT:    ret i32 [[TMP4]]
+//
 int test_alu_slet(int32_t a, int32_t b) {
   return __riscv_cv_alu_slet(a, b);
 }
 
-// CHECK-LABEL: @test_alu_sletu
-// CHECK: @llvm.riscv.cv.alu.sletu
+// CHECK-LABEL: @test_alu_sletu(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.riscv.cv.alu.sletu(i32 [[TMP2]], i32 [[TMP3]])
+// CHECK-NEXT:    ret i32 [[TMP4]]
+//
 int test_alu_sletu(uint32_t a, uint32_t b) {
   return __riscv_cv_alu_sletu(a, b);
 }
 
-// CHECK-LABEL: @test_alu_min
-// CHECK: @min
+// CHECK-LABEL: @test_alu_min(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[ELT_MIN_I:%.*]] = call i32 @llvm.smin.i32(i32 [[TMP2]], i32 [[TMP3]])
+// CHECK-NEXT:    ret i32 [[ELT_MIN_I]]
+//
 int test_alu_min(int32_t a, int32_t b) {
   return __riscv_cv_alu_min(a, b);
 }
 
-// CHECK-LABEL: @test_alu_minu
-// CHECK: @llvm.riscv.cv.alu.minu
+// CHECK-LABEL: @test_alu_minu(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[ELT_MIN_I:%.*]] = call i32 @llvm.umin.i32(i32 [[TMP2]], i32 [[TMP3]])
+// CHECK-NEXT:    ret i32 [[ELT_MIN_I]]
+//
 int test_alu_minu(uint32_t a, uint32_t b) {
   return __riscv_cv_alu_minu(a, b);
 }
 
-// CHECK-LABEL: @test_alu_max
-// CHECK: @max
+// CHECK-LABEL: @test_alu_max(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[ELT_MAX_I:%.*]] = call i32 @llvm.smax.i32(i32 [[TMP2]], i32 [[TMP3]])
+// CHECK-NEXT:    ret i32 [[ELT_MAX_I]]
+//
 int test_alu_max(int32_t a, int32_t b) {
   return __riscv_cv_alu_max(a, b);
 }
 
-// CHECK-LABEL: @test_alu_maxu
-// CHECK: @llvm.riscv.cv.alu.maxu
+// CHECK-LABEL: @test_alu_maxu(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[ELT_MAX_I:%.*]] = call i32 @llvm.umax.i32(i32 [[TMP2]], i32 [[TMP3]])
+// CHECK-NEXT:    ret i32 [[ELT_MAX_I]]
+//
 int test_alu_maxu(uint32_t a, uint32_t b) {
   return __riscv_cv_alu_maxu(a, b);
 }
 
-// CHECK-LABEL: @test_alu_exths
-// CHECK: @llvm.riscv.cv.alu.exths
+// CHECK-LABEL: @test_alu_exths(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR_I:%.*]] = alloca i16, align 2
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i16, align 2
+// CHECK-NEXT:    store i16 [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    store i16 [[TMP0]], ptr [[A_ADDR_I]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[A_ADDR_I]], align 2
+// CHECK-NEXT:    [[CONV_I:%.*]] = sext i16 [[TMP1]] to i32
+// CHECK-NEXT:    [[EXTHS_I:%.*]] = sext i16 [[TMP1]] to i32
+// CHECK-NEXT:    ret i32 [[EXTHS_I]]
+//
 int test_alu_exths(int16_t a) {
   return __riscv_cv_alu_exths(a);
 }
 
-// CHECK-LABEL: @test_alu_exthz
-// CHECK: @llvm.riscv.cv.alu.exthz
+// CHECK-LABEL: @test_alu_exthz(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR_I:%.*]] = alloca i16, align 2
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i16, align 2
+// CHECK-NEXT:    store i16 [[A:%.*]], ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[A_ADDR]], align 2
+// CHECK-NEXT:    store i16 [[TMP0]], ptr [[A_ADDR_I]], align 2
+// CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[A_ADDR_I]], align 2
+// CHECK-NEXT:    [[CONV_I:%.*]] = zext i16 [[TMP1]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.exthz(i32 [[CONV_I]])
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
 int test_alu_exthz(uint16_t a) {
   return __riscv_cv_alu_exthz(a);
 }
 
-// CHECK-LABEL: @test_alu_extbs
-// CHECK: @llvm.riscv.cv.alu.extbs
+// CHECK-LABEL: @test_alu_extbs(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR_I:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    store i8 [[A:%.*]], ptr [[A_ADDR]], align 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[A_ADDR]], align 1
+// CHECK-NEXT:    store i8 [[TMP0]], ptr [[A_ADDR_I]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[A_ADDR_I]], align 1
+// CHECK-NEXT:    [[CONV_I:%.*]] = sext i8 [[TMP1]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.extbs(i32 [[CONV_I]])
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
 int test_alu_extbs(int8_t a) {
   return __riscv_cv_alu_extbs(a);
 }
 
-// CHECK-LABEL: @test_alu_extbz
-// CHECK: @llvm.riscv.cv.alu.extbz
+// CHECK-LABEL: @test_alu_extbz(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR_I:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    store i8 [[A:%.*]], ptr [[A_ADDR]], align 1
+// CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[A_ADDR]], align 1
+// CHECK-NEXT:    store i8 [[TMP0]], ptr [[A_ADDR_I]], align 1
+// CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[A_ADDR_I]], align 1
+// CHECK-NEXT:    [[CONV_I:%.*]] = zext i8 [[TMP1]] to i32
+// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.extbz(i32 [[CONV_I]])
+// CHECK-NEXT:    ret i32 [[TMP2]]
+//
 int test_alu_extbz(uint8_t a) {
   return __riscv_cv_alu_extbz(a);
 }
 
-// CHECK-LABEL: @test_alu_clip
-// CHECK: @llvm.riscv.cv.alu.clip
+// CHECK-LABEL: @test_alu_clip(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    store i32 0, ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.riscv.cv.alu.clip(i32 [[TMP1]], i32 [[TMP2]])
+// CHECK-NEXT:    ret i32 [[TMP3]]
+//
 int test_alu_clip(int32_t a) {
   return __riscv_cv_alu_clip(a, 0);
 }
 
-// CHECK-LABEL: @test_alu_clipu
-// CHECK: @llvm.riscv.cv.alu.clipu
+// CHECK-LABEL: @test_alu_clipu(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    store i32 0, ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = call i32 @llvm.riscv.cv.alu.clipu(i32 [[TMP1]], i32 [[TMP2]])
+// CHECK-NEXT:    ret i32 [[TMP3]]
+//
 int test_alu_clipu(uint32_t a) {
   return __riscv_cv_alu_clipu(a, 0);
 }
 
-// CHECK-LABEL: @test_alu_addN
-// CHECK: @llvm.riscv.cv.alu.addN
+// CHECK-LABEL: @test_alu_addN(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[SHFT_ADDR_I:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    store i8 0, ptr [[SHFT_ADDR_I]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[SHFT_ADDR_I]], align 1
+// CHECK-NEXT:    [[CONV_I:%.*]] = zext i8 [[TMP4]] to i32
+// CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.riscv.cv.alu.addN(i32 [[TMP2]], i32 [[TMP3]], i32 [[CONV_I]])
+// CHECK-NEXT:    ret i32 [[TMP5]]
+//
 int test_alu_addN(int32_t a, int32_t b) {
   return __riscv_cv_alu_addN(a, b, 0);
 }
 
-// CHECK-LABEL: @test_alu_adduN
-// CHECK: @llvm.riscv.cv.alu.adduN
+// CHECK-LABEL: @test_alu_adduN(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[SHFT_ADDR_I:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    store i8 0, ptr [[SHFT_ADDR_I]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[SHFT_ADDR_I]], align 1
+// CHECK-NEXT:    [[CONV_I:%.*]] = zext i8 [[TMP4]] to i32
+// CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.riscv.cv.alu.adduN(i32 [[TMP2]], i32 [[TMP3]], i32 [[CONV_I]])
+// CHECK-NEXT:    ret i32 [[TMP5]]
+//
 int test_alu_adduN(uint32_t a, uint32_t b) {
   return __riscv_cv_alu_adduN(a, b, 0);
 }
 
-// CHECK-LABEL: @test_alu_addRN
-// CHECK: @llvm.riscv.cv.alu.addRN
+// CHECK-LABEL: @test_alu_addRN(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[SHFT_ADDR_I:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    store i8 0, ptr [[SHFT_ADDR_I]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[SHFT_ADDR_I]], align 1
+// CHECK-NEXT:    [[CONV_I:%.*]] = zext i8 [[TMP4]] to i32
+// CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.riscv.cv.alu.addRN(i32 [[TMP2]], i32 [[TMP3]], i32 [[CONV_I]])
+// CHECK-NEXT:    ret i32 [[TMP5]]
+//
 int test_alu_addRN(int32_t a, int32_t b) {
   return __riscv_cv_alu_addRN(a, b, 0);
 }
 
-// CHECK-LABEL: @test_alu_adduRN
-// CHECK: @llvm.riscv.cv.alu.adduRN
+// CHECK-LABEL: @test_alu_adduRN(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[SHFT_ADDR_I:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    store i8 0, ptr [[SHFT_ADDR_I]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[SHFT_ADDR_I]], align 1
+// CHECK-NEXT:    [[CONV_I:%.*]] = zext i8 [[TMP4]] to i32
+// CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.riscv.cv.alu.adduRN(i32 [[TMP2]], i32 [[TMP3]], i32 [[CONV_I]])
+// CHECK-NEXT:    ret i32 [[TMP5]]
+//
 int test_alu_adduRN(uint32_t a, uint32_t b) {
   return __riscv_cv_alu_adduRN(a, b, 0);
 }
 
-// CHECK-LABEL: @test_alu_subN
-// CHECK: @llvm.riscv.cv.alu.subN
+// CHECK-LABEL: @test_alu_subN(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[SHFT_ADDR_I:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    store i8 0, ptr [[SHFT_ADDR_I]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[SHFT_ADDR_I]], align 1
+// CHECK-NEXT:    [[CONV_I:%.*]] = zext i8 [[TMP4]] to i32
+// CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.riscv.cv.alu.subN(i32 [[TMP2]], i32 [[TMP3]], i32 [[CONV_I]])
+// CHECK-NEXT:    ret i32 [[TMP5]]
+//
 int test_alu_subN(int32_t a, int32_t b) {
   return __riscv_cv_alu_subN(a, b, 0);
 }
 
-// CHECK-LABEL: @test_alu_subuN
-// CHECK: @llvm.riscv.cv.alu.subuN
+// CHECK-LABEL: @test_alu_subuN(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[SHFT_ADDR_I:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    store i8 0, ptr [[SHFT_ADDR_I]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[SHFT_ADDR_I]], align 1
+// CHECK-NEXT:    [[CONV_I:%.*]] = zext i8 [[TMP4]] to i32
+// CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.riscv.cv.alu.subuN(i32 [[TMP2]], i32 [[TMP3]], i32 [[CONV_I]])
+// CHECK-NEXT:    ret i32 [[TMP5]]
+//
 int test_alu_subuN(uint32_t a, uint32_t b) {
   return __riscv_cv_alu_subuN(a, b, 0);
 }
 
-// CHECK-LABEL: @test_alu_subRN
-// CHECK: @llvm.riscv.cv.alu.subRN
+// CHECK-LABEL: @test_alu_subRN(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[SHFT_ADDR_I:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    store i8 0, ptr [[SHFT_ADDR_I]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[SHFT_ADDR_I]], align 1
+// CHECK-NEXT:    [[CONV_I:%.*]] = zext i8 [[TMP4]] to i32
+// CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.riscv.cv.alu.subRN(i32 [[TMP2]], i32 [[TMP3]], i32 [[CONV_I]])
+// CHECK-NEXT:    ret i32 [[TMP5]]
+//
 int test_alu_subRN(int32_t a, int32_t b) {
   return __riscv_cv_alu_subRN(a, b, 0);
 }
 
-// CHECK-LABEL: @test_alu_subuRN
-// CHECK: @llvm.riscv.cv.alu.subuRN
+// CHECK-LABEL: @test_alu_subuRN(
+// CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[A_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR_I:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[SHFT_ADDR_I:%.*]] = alloca i8, align 1
+// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
+// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
+// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
+// CHECK-NEXT:    store i32 [[TMP0]], ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    store i32 [[TMP1]], ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    store i8 0, ptr [[SHFT_ADDR_I]], align 1
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
+// CHECK-NEXT:    [[TMP4:%.*]] = load i8, ptr [[SHFT_ADDR_I]], align 1
+// CHECK-NEXT:    [[CONV_I:%.*]] = zext i8 [[TMP4]] to i32
+// CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.riscv.cv.alu.subuRN(i32 [[TMP2]], i32 [[TMP3]], i32 [[CONV_I]])
+// CHECK-NEXT:    ret i32 [[TMP5]]
+//
 int test_alu_subuRN(uint32_t a, uint32_t b) {
   return __riscv_cv_alu_subuRN(a, b, 0);
 }
diff --git a/clang/test/CodeGen/RISCV/riscv-xcvalu.c b/clang/test/CodeGen/RISCV/riscv-xcvalu.c
index 2445c0bd07fd5b..253c0c09fa852d 100644
--- a/clang/test/CodeGen/RISCV/riscv-xcvalu.c
+++ b/clang/test/CodeGen/RISCV/riscv-xcvalu.c
@@ -46,44 +46,15 @@ int test_alu_sletu(uint32_t a, uint32_t b) {
   return __builtin_riscv_cv_alu_sletu(a, b);
 }
 
-// CHECK-LABEL: @test_alu_minu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.minu(i32 [[TMP0]], i32 [[TMP1]])
-// CHECK-NEXT:    ret i32 [[TMP2]]
-//
-int test_alu_minu(uint32_t a, uint32_t b) {
-  return __builtin_riscv_cv_alu_minu(a, b);
-}
-
-// CHECK-LABEL: @test_alu_maxu(
-// CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
-// CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
-// CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.maxu(i32 [[TMP0]], i32 [[TMP1]])
-// CHECK-NEXT:    ret i32 [[TMP2]]
-//
-int test_alu_maxu(uint32_t a, uint32_t b) {
-  return __builtin_riscv_cv_alu_maxu(a, b);
-}
-
 // CHECK-LABEL: @test_alu_exths(
 // CHECK-NEXT:  entry:
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i16, align 2
 // CHECK-NEXT:    store i16 [[A:%.*]], ptr [[A_ADDR]], align 2
 // CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[A_ADDR]], align 2
 // CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[TMP0]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.riscv.cv.alu.exths(i32 [[CONV]])
-// CHECK-NEXT:    ret i32 [[TMP1]]
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[CONV]] to i16
+// CHECK-NEXT:    [[EXTHS:%.*]] = sext i16 [[TMP1]] to i32
+// CHECK-NEXT:    ret i32 [[EXTHS]]
 //
 int test_alu_exths(int16_t a) {
   return __builtin_riscv_cv_alu_exths(a);
@@ -95,8 +66,9 @@ int test_alu_exths(int16_t a) {
 // CHECK-NEXT:    store i16 [[A:%.*]], ptr [[A_ADDR]], align 2
 // CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[A_ADDR]], align 2
 // CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[TMP0]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.riscv.cv.alu.exthz(i32 [[CONV]])
-// CHECK-NEXT:    ret i32 [[TMP1]]
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[CONV]] to i16
+// CHECK-NEXT:    [[EXTHS:%.*]] = zext i16 [[TMP1]] to i32
+// CHECK-NEXT:    ret i32 [[EXTHS]]
 //
 int test_alu_exthz(uint16_t a) {
   return __builtin_riscv_cv_alu_exthz(a);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
index fedf51cddb0533..fba7d14ff61646 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
@@ -795,12 +795,6 @@ let Predicates = [HasVendorXCValu, IsRV32], AddedComplexity = 1 in {
 
   def : PatCoreVAluGprGpr<"slet", "SLET">;
   def : PatCoreVAluGprGpr<"sletu", "SLETU">;
-  def : PatCoreVAluGprGpr<"min", "MIN">;
-  def : PatCoreVAluGprGpr<"minu", "MINU">;
-  def : PatCoreVAluGprGpr<"max", "MAX">;
-  def : PatCoreVAluGprGpr<"maxu", "MAXU">;
-  def : PatCoreVAluGpr<"exths", "EXTHS">;
-  def : PatCoreVAluGpr<"exthz", "EXTHZ">;
   def : PatCoreVAluGpr<"extbs", "EXTBS">;
   def : PatCoreVAluGpr<"extbz", "EXTBZ">;
 
diff --git a/llvm/test/CodeGen/RISCV/xcvalu.ll b/llvm/test/CodeGen/RISCV/xcvalu.ll
index 0a81fd8dd717d0..69fb394653071e 100644
--- a/llvm/test/CodeGen/RISCV/xcvalu.ll
+++ b/llvm/test/CodeGen/RISCV/xcvalu.ll
@@ -113,72 +113,6 @@ define i32 @test.cv.alu.sletu(i32 %a, i32 %b) {
   ret i32 %1
 }
 
-declare i32 @llvm.riscv.cv.alu.min(i32, i32)
-
-define i32 @test.cv.alu.min(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.min:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    cv.min a0, a0, a1
-; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.min(i32 %a, i32 %b)
-  ret i32 %1
-}
-
-declare i32 @llvm.riscv.cv.alu.minu(i32, i32)
-
-define i32 @test.cv.alu.minu(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.minu:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    cv.minu a0, a0, a1
-; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.minu(i32 %a, i32 %b)
-  ret i32 %1
-}
-
-declare i32 @llvm.riscv.cv.alu.max(i32, i32)
-
-define i32 @test.cv.alu.max(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.max:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    cv.max a0, a0, a1
-; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.max(i32 %a, i32 %b)
-  ret i32 %1
-}
-
-declare i32 @llvm.riscv.cv.alu.maxu(i32, i32)
-
-define i32 @test.cv.alu.maxu(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.maxu:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    cv.maxu a0, a0, a1
-; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.maxu(i32 %a, i32 %b)
-  ret i32 %1
-}
-
-declare i32 @llvm.riscv.cv.alu.exths(i32)
-
-define i32 @test.cv.alu.exths(i32 %a) {
-; CHECK-LABEL: test.cv.alu.exths:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    cv.exths a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.exths(i32 %a)
-  ret i32 %1
-}
-
-declare i32 @llvm.riscv.cv.alu.exthz(i32)
-
-define i32 @test.cv.alu.exthz(i32 %a) {
-; CHECK-LABEL: test.cv.alu.exthz:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    cv.exthz a0, a0
-; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.exthz(i32 %a)
-  ret i32 %1
-}
-
 declare i32 @llvm.riscv.cv.alu.extbs(i32)
 
 define i32 @test.cv.alu.extbs(i32 %a) {

>From 1b56a7fc423c48e6325987a8b2b574eb24711c4a Mon Sep 17 00:00:00 2001
From: Qihan Cai <caiqihan021 at hotmail.com>
Date: Fri, 13 Sep 2024 14:21:56 +1000
Subject: [PATCH 6/8] Generate extb[s/z], sle[u] by instruction. Remove no
 longer used intrinsics.

---
 clang/lib/CodeGen/CGBuiltin.cpp               | 17 ++++----
 clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c | 22 +++++-----
 clang/test/CodeGen/RISCV/riscv-xcvalu.c       | 24 +++++++----
 llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td    |  6 +--
 llvm/test/CodeGen/RISCV/xcvalu.ll             | 40 ++++---------------
 5 files changed, 44 insertions(+), 65 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index 0aa576d9a4a290..c1c97a94948fa2 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -22359,24 +22359,21 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     ID = Intrinsic::riscv_cv_alu_clipu;
     break;
   case RISCV::BI__builtin_riscv_cv_alu_extbs:
-    ID = Intrinsic::riscv_cv_alu_extbs;
-    break;
+    return Builder.CreateSExt(Builder.CreateTrunc(Ops[0], Int8Ty), Int32Ty,
+                              "extbs");
   case RISCV::BI__builtin_riscv_cv_alu_extbz:
-    ID = Intrinsic::riscv_cv_alu_extbz;
-    break;
-  case RISCV::BI__builtin_riscv_cv_alu_exths: {
+    return Builder.CreateZExt(Builder.CreateTrunc(Ops[0], Int8Ty), Int32Ty,
+                              "extbz");
+  case RISCV::BI__builtin_riscv_cv_alu_exths:
     return Builder.CreateSExt(Builder.CreateTrunc(Ops[0], Int16Ty), Int32Ty,
                               "exths");
-  }
   case RISCV::BI__builtin_riscv_cv_alu_exthz:
     return Builder.CreateZExt(Builder.CreateTrunc(Ops[0], Int16Ty), Int32Ty,
                               "exthz");
   case RISCV::BI__builtin_riscv_cv_alu_slet:
-    ID = Intrinsic::riscv_cv_alu_slet;
-    break;
+    return Builder.CreateICmpSLE(Ops[0], Ops[1], "sle");
   case RISCV::BI__builtin_riscv_cv_alu_sletu:
-    ID = Intrinsic::riscv_cv_alu_sletu;
-    break;
+    return Builder.CreateICmpULE(Ops[0], Ops[1], "sleu");
   case RISCV::BI__builtin_riscv_cv_alu_subN:
     ID = Intrinsic::riscv_cv_alu_subN;
     break;
diff --git a/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c b/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c
index e6dd38149c090a..ab5a1beef7a015 100644
--- a/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c
+++ b/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c
@@ -19,8 +19,9 @@
 // CHECK-NEXT:    store i32 [[TMP1]], ptr [[B_ADDR_I]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.riscv.cv.alu.slet(i32 [[TMP2]], i32 [[TMP3]])
-// CHECK-NEXT:    ret i32 [[TMP4]]
+// CHECK-NEXT:    [[SLE_I:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]]
+// CHECK-NEXT:    [[CONV_I:%.*]] = sext i1 [[SLE_I]] to i32
+// CHECK-NEXT:    ret i32 [[CONV_I]]
 //
 int test_alu_slet(int32_t a, int32_t b) {
   return __riscv_cv_alu_slet(a, b);
@@ -40,8 +41,9 @@ int test_alu_slet(int32_t a, int32_t b) {
 // CHECK-NEXT:    store i32 [[TMP1]], ptr [[B_ADDR_I]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
-// CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.riscv.cv.alu.sletu(i32 [[TMP2]], i32 [[TMP3]])
-// CHECK-NEXT:    ret i32 [[TMP4]]
+// CHECK-NEXT:    [[SLEU_I:%.*]] = icmp ule i32 [[TMP2]], [[TMP3]]
+// CHECK-NEXT:    [[CONV_I:%.*]] = sext i1 [[SLEU_I]] to i32
+// CHECK-NEXT:    ret i32 [[CONV_I]]
 //
 int test_alu_sletu(uint32_t a, uint32_t b) {
   return __riscv_cv_alu_sletu(a, b);
@@ -156,8 +158,8 @@ int test_alu_exths(int16_t a) {
 // CHECK-NEXT:    store i16 [[TMP0]], ptr [[A_ADDR_I]], align 2
 // CHECK-NEXT:    [[TMP1:%.*]] = load i16, ptr [[A_ADDR_I]], align 2
 // CHECK-NEXT:    [[CONV_I:%.*]] = zext i16 [[TMP1]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.exthz(i32 [[CONV_I]])
-// CHECK-NEXT:    ret i32 [[TMP2]]
+// CHECK-NEXT:    [[EXTHZ_I:%.*]] = zext i16 [[TMP1]] to i32
+// CHECK-NEXT:    ret i32 [[EXTHZ_I]]
 //
 int test_alu_exthz(uint16_t a) {
   return __riscv_cv_alu_exthz(a);
@@ -172,8 +174,8 @@ int test_alu_exthz(uint16_t a) {
 // CHECK-NEXT:    store i8 [[TMP0]], ptr [[A_ADDR_I]], align 1
 // CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[A_ADDR_I]], align 1
 // CHECK-NEXT:    [[CONV_I:%.*]] = sext i8 [[TMP1]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.extbs(i32 [[CONV_I]])
-// CHECK-NEXT:    ret i32 [[TMP2]]
+// CHECK-NEXT:    [[EXTBS_I:%.*]] = sext i8 [[TMP1]] to i32
+// CHECK-NEXT:    ret i32 [[EXTBS_I]]
 //
 int test_alu_extbs(int8_t a) {
   return __riscv_cv_alu_extbs(a);
@@ -188,8 +190,8 @@ int test_alu_extbs(int8_t a) {
 // CHECK-NEXT:    store i8 [[TMP0]], ptr [[A_ADDR_I]], align 1
 // CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[A_ADDR_I]], align 1
 // CHECK-NEXT:    [[CONV_I:%.*]] = zext i8 [[TMP1]] to i32
-// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.extbz(i32 [[CONV_I]])
-// CHECK-NEXT:    ret i32 [[TMP2]]
+// CHECK-NEXT:    [[EXTBZ_I:%.*]] = zext i8 [[TMP1]] to i32
+// CHECK-NEXT:    ret i32 [[EXTBZ_I]]
 //
 int test_alu_extbz(uint8_t a) {
   return __riscv_cv_alu_extbz(a);
diff --git a/clang/test/CodeGen/RISCV/riscv-xcvalu.c b/clang/test/CodeGen/RISCV/riscv-xcvalu.c
index 253c0c09fa852d..26e8da68e70b78 100644
--- a/clang/test/CodeGen/RISCV/riscv-xcvalu.c
+++ b/clang/test/CodeGen/RISCV/riscv-xcvalu.c
@@ -18,13 +18,16 @@ int test_abs(int a) {
 
 // CHECK-LABEL: @test_alu_slet(
 // CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.slet(i32 [[TMP0]], i32 [[TMP1]])
+// CHECK-NEXT:    [[SLE:%.*]] = icmp sle i32 [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    store i1 [[SLE]], ptr [[RETVAL]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4
 // CHECK-NEXT:    ret i32 [[TMP2]]
 //
 int test_alu_slet(int32_t a, int32_t b) {
@@ -33,13 +36,16 @@ int test_alu_slet(int32_t a, int32_t b) {
 
 // CHECK-LABEL: @test_alu_sletu(
 // CHECK-NEXT:  entry:
+// CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = call i32 @llvm.riscv.cv.alu.sletu(i32 [[TMP0]], i32 [[TMP1]])
+// CHECK-NEXT:    [[SLEU:%.*]] = icmp ule i32 [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    store i1 [[SLEU]], ptr [[RETVAL]], align 4
+// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4
 // CHECK-NEXT:    ret i32 [[TMP2]]
 //
 int test_alu_sletu(uint32_t a, uint32_t b) {
@@ -67,8 +73,8 @@ int test_alu_exths(int16_t a) {
 // CHECK-NEXT:    [[TMP0:%.*]] = load i16, ptr [[A_ADDR]], align 2
 // CHECK-NEXT:    [[CONV:%.*]] = zext i16 [[TMP0]] to i32
 // CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[CONV]] to i16
-// CHECK-NEXT:    [[EXTHS:%.*]] = zext i16 [[TMP1]] to i32
-// CHECK-NEXT:    ret i32 [[EXTHS]]
+// CHECK-NEXT:    [[EXTHZ:%.*]] = zext i16 [[TMP1]] to i32
+// CHECK-NEXT:    ret i32 [[EXTHZ]]
 //
 int test_alu_exthz(uint16_t a) {
   return __builtin_riscv_cv_alu_exthz(a);
@@ -80,8 +86,9 @@ int test_alu_exthz(uint16_t a) {
 // CHECK-NEXT:    store i8 [[A:%.*]], ptr [[A_ADDR]], align 1
 // CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[A_ADDR]], align 1
 // CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[TMP0]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.riscv.cv.alu.extbs(i32 [[CONV]])
-// CHECK-NEXT:    ret i32 [[TMP1]]
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[CONV]] to i8
+// CHECK-NEXT:    [[EXTBS:%.*]] = sext i8 [[TMP1]] to i32
+// CHECK-NEXT:    ret i32 [[EXTBS]]
 //
 int test_alu_extbs(int8_t a) {
   return __builtin_riscv_cv_alu_extbs(a);
@@ -93,8 +100,9 @@ int test_alu_extbs(int8_t a) {
 // CHECK-NEXT:    store i8 [[A:%.*]], ptr [[A_ADDR]], align 1
 // CHECK-NEXT:    [[TMP0:%.*]] = load i8, ptr [[A_ADDR]], align 1
 // CHECK-NEXT:    [[CONV:%.*]] = zext i8 [[TMP0]] to i32
-// CHECK-NEXT:    [[TMP1:%.*]] = call i32 @llvm.riscv.cv.alu.extbz(i32 [[CONV]])
-// CHECK-NEXT:    ret i32 [[TMP1]]
+// CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[CONV]] to i8
+// CHECK-NEXT:    [[EXTBZ:%.*]] = zext i8 [[TMP1]] to i32
+// CHECK-NEXT:    ret i32 [[EXTBZ]]
 //
 int test_alu_extbz(uint8_t a) {
   return __builtin_riscv_cv_alu_extbz(a);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
index fba7d14ff61646..b54baa16d9286b 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoXCV.td
@@ -792,11 +792,7 @@ let Predicates = [HasVendorXCValu, IsRV32], AddedComplexity = 1 in {
   def : Pat<(sext_inreg (XLenVT GPR:$rs1), i16), (CV_EXTHS GPR:$rs1)>;
   def : Pat<(sext_inreg (XLenVT GPR:$rs1), i8), (CV_EXTBS GPR:$rs1)>;
   def : Pat<(and (XLenVT GPR:$rs1), 0xffff), (CV_EXTHZ GPR:$rs1)>;
-
-  def : PatCoreVAluGprGpr<"slet", "SLET">;
-  def : PatCoreVAluGprGpr<"sletu", "SLETU">;
-  def : PatCoreVAluGpr<"extbs", "EXTBS">;
-  def : PatCoreVAluGpr<"extbz", "EXTBZ">;
+  def : Pat<(and (XLenVT GPR:$rs1), 0xff), (CV_EXTBZ GPR:$rs1)>;
 
   defm CLIP   : PatCoreVAluGprImm<int_riscv_cv_alu_clip>;
   defm CLIPU  : PatCoreVAluGprImm<int_riscv_cv_alu_clipu>;
diff --git a/llvm/test/CodeGen/RISCV/xcvalu.ll b/llvm/test/CodeGen/RISCV/xcvalu.ll
index 69fb394653071e..54634383dfdac5 100644
--- a/llvm/test/CodeGen/RISCV/xcvalu.ll
+++ b/llvm/test/CodeGen/RISCV/xcvalu.ll
@@ -91,47 +91,23 @@ define i32 @exthz(i16 %a) {
   ret i32 %1
 }
 
-declare i32 @llvm.riscv.cv.alu.slet(i32, i32)
-
-define i32 @test.cv.alu.slet(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.slet:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    cv.slet a0, a0, a1
-; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.slet(i32 %a, i32 %b)
-  ret i32 %1
-}
-
-declare i32 @llvm.riscv.cv.alu.sletu(i32, i32)
-
-define i32 @test.cv.alu.sletu(i32 %a, i32 %b) {
-; CHECK-LABEL: test.cv.alu.sletu:
-; CHECK:       # %bb.0:
-; CHECK-NEXT:    cv.sletu a0, a0, a1
-; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.sletu(i32 %a, i32 %b)
-  ret i32 %1
-}
-
-declare i32 @llvm.riscv.cv.alu.extbs(i32)
-
-define i32 @test.cv.alu.extbs(i32 %a) {
-; CHECK-LABEL: test.cv.alu.extbs:
+define i32 @extbs(i8 %a) {
+; CHECK-LABEL: extbs:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $x11 killed $x10
 ; CHECK-NEXT:    cv.extbs a0, a0
 ; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.extbs(i32 %a)
+  %1 = sext i8 %a to i32
   ret i32 %1
 }
 
-declare i32 @llvm.riscv.cv.alu.extbz(i32)
-
-define i32 @test.cv.alu.extbz(i32 %a) {
-; CHECK-LABEL: test.cv.alu.extbz:
+define i32 @extbz(i8 %a) {
+; CHECK-LABEL: extbz:
 ; CHECK:       # %bb.0:
+; CHECK-NEXT:    # kill: def $x11 killed $x10
 ; CHECK-NEXT:    cv.extbz a0, a0
 ; CHECK-NEXT:    ret
-  %1 = call i32 @llvm.riscv.cv.alu.extbz(i32 %a)
+  %1 = zext i8 %a to i32
   ret i32 %1
 }
 

>From 7ee760d95ec28b65285769745631a2b6c6a45eaa Mon Sep 17 00:00:00 2001
From: Qihan Cai <caiqihan021 at hotmail.com>
Date: Thu, 26 Sep 2024 17:13:05 +1000
Subject: [PATCH 7/8] Remove unused intrinsics, fix sle[u] issue by zext
 instruction.

---
 clang/lib/CodeGen/CGBuiltin.cpp               |  6 ++++--
 clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c |  8 ++++----
 clang/test/CodeGen/RISCV/riscv-xcvalu.c       | 16 ++++++----------
 llvm/include/llvm/IR/IntrinsicsRISCVXCV.td    | 10 ----------
 4 files changed, 14 insertions(+), 26 deletions(-)

diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp
index c1c97a94948fa2..1342dca1bf17bd 100644
--- a/clang/lib/CodeGen/CGBuiltin.cpp
+++ b/clang/lib/CodeGen/CGBuiltin.cpp
@@ -22371,9 +22371,11 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
     return Builder.CreateZExt(Builder.CreateTrunc(Ops[0], Int16Ty), Int32Ty,
                               "exthz");
   case RISCV::BI__builtin_riscv_cv_alu_slet:
-    return Builder.CreateICmpSLE(Ops[0], Ops[1], "sle");
+    return Builder.CreateZExt(Builder.CreateICmpSLE(Ops[0], Ops[1]), Int32Ty,
+                              "sle");
   case RISCV::BI__builtin_riscv_cv_alu_sletu:
-    return Builder.CreateICmpULE(Ops[0], Ops[1], "sleu");
+    return Builder.CreateZExt(Builder.CreateICmpULE(Ops[0], Ops[1]), Int32Ty,
+                              "sleu");
   case RISCV::BI__builtin_riscv_cv_alu_subN:
     ID = Intrinsic::riscv_cv_alu_subN;
     break;
diff --git a/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c b/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c
index ab5a1beef7a015..2c7b6ff9ac2144 100644
--- a/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c
+++ b/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c
@@ -19,8 +19,8 @@
 // CHECK-NEXT:    store i32 [[TMP1]], ptr [[B_ADDR_I]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
-// CHECK-NEXT:    [[SLE_I:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]]
-// CHECK-NEXT:    [[CONV_I:%.*]] = sext i1 [[SLE_I]] to i32
+// CHECK-NEXT:    [[TMP4:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]]
+// CHECK-NEXT:    [[CONV_I:%.*]] = sext i1 [[TMP4]] to i32
 // CHECK-NEXT:    ret i32 [[CONV_I]]
 //
 int test_alu_slet(int32_t a, int32_t b) {
@@ -41,8 +41,8 @@ int test_alu_slet(int32_t a, int32_t b) {
 // CHECK-NEXT:    store i32 [[TMP1]], ptr [[B_ADDR_I]], align 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
-// CHECK-NEXT:    [[SLEU_I:%.*]] = icmp ule i32 [[TMP2]], [[TMP3]]
-// CHECK-NEXT:    [[CONV_I:%.*]] = sext i1 [[SLEU_I]] to i32
+// CHECK-NEXT:    [[TMP4:%.*]] = icmp ule i32 [[TMP2]], [[TMP3]]
+// CHECK-NEXT:    [[CONV_I:%.*]] = sext i1 [[TMP4]] to i32
 // CHECK-NEXT:    ret i32 [[CONV_I]]
 //
 int test_alu_sletu(uint32_t a, uint32_t b) {
diff --git a/clang/test/CodeGen/RISCV/riscv-xcvalu.c b/clang/test/CodeGen/RISCV/riscv-xcvalu.c
index 26e8da68e70b78..e4c2a2c3ca28b0 100644
--- a/clang/test/CodeGen/RISCV/riscv-xcvalu.c
+++ b/clang/test/CodeGen/RISCV/riscv-xcvalu.c
@@ -18,17 +18,15 @@ int test_abs(int a) {
 
 // CHECK-LABEL: @test_alu_slet(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    [[SLE:%.*]] = icmp sle i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    store i1 [[SLE]], ptr [[RETVAL]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4
-// CHECK-NEXT:    ret i32 [[TMP2]]
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp sle i32 [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SLE:%.*]] = zext i1 [[TMP2]] to i32
+// CHECK-NEXT:    ret i32 [[SLE]]
 //
 int test_alu_slet(int32_t a, int32_t b) {
   return __builtin_riscv_cv_alu_slet(a, b);
@@ -36,17 +34,15 @@ int test_alu_slet(int32_t a, int32_t b) {
 
 // CHECK-LABEL: @test_alu_sletu(
 // CHECK-NEXT:  entry:
-// CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    [[B_ADDR:%.*]] = alloca i32, align 4
 // CHECK-NEXT:    store i32 [[A:%.*]], ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    store i32 [[B:%.*]], ptr [[B_ADDR]], align 4
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A_ADDR]], align 4
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B_ADDR]], align 4
-// CHECK-NEXT:    [[SLEU:%.*]] = icmp ule i32 [[TMP0]], [[TMP1]]
-// CHECK-NEXT:    store i1 [[SLEU]], ptr [[RETVAL]], align 4
-// CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[RETVAL]], align 4
-// CHECK-NEXT:    ret i32 [[TMP2]]
+// CHECK-NEXT:    [[TMP2:%.*]] = icmp ule i32 [[TMP0]], [[TMP1]]
+// CHECK-NEXT:    [[SLEU:%.*]] = zext i1 [[TMP2]] to i32
+// CHECK-NEXT:    ret i32 [[SLEU]]
 //
 int test_alu_sletu(uint32_t a, uint32_t b) {
   return __builtin_riscv_cv_alu_sletu(a, b);
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCVXCV.td b/llvm/include/llvm/IR/IntrinsicsRISCVXCV.td
index 02105900d037a3..6e7e90438c6211 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCVXCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCVXCV.td
@@ -59,16 +59,6 @@ let TargetPrefix = "riscv" in {
                             [IntrNoMem, IntrWillReturn, IntrSpeculatable,
                             ImmArg<ArgIndex<1>>, ImmArg<ArgIndex<2>>]>;
 
-  def int_riscv_cv_alu_slet  : ScalarCoreVAluGprGprIntrinsic;
-  def int_riscv_cv_alu_sletu : ScalarCoreVAluGprGprIntrinsic;
-  def int_riscv_cv_alu_min   : ScalarCoreVAluGprGprIntrinsic;
-  def int_riscv_cv_alu_minu  : ScalarCoreVAluGprGprIntrinsic;
-  def int_riscv_cv_alu_max   : ScalarCoreVAluGprGprIntrinsic;
-  def int_riscv_cv_alu_maxu  : ScalarCoreVAluGprGprIntrinsic;
-  def int_riscv_cv_alu_exths : ScalarCoreVAluGprIntrinsic;
-  def int_riscv_cv_alu_exthz : ScalarCoreVAluGprIntrinsic;
-  def int_riscv_cv_alu_extbs : ScalarCoreVAluGprIntrinsic;
-  def int_riscv_cv_alu_extbz : ScalarCoreVAluGprIntrinsic;
   def int_riscv_cv_alu_clip   : ScalarCoreVAluGprGprIntrinsic;
   def int_riscv_cv_alu_clipu  : ScalarCoreVAluGprGprIntrinsic;
   def int_riscv_cv_alu_addN   : ScalarCoreVAluGprGprGprIntrinsic;

>From 71d2b9253e2917af27176619fd5f8a500b7bc1f4 Mon Sep 17 00:00:00 2001
From: Qihan Cai <caiqihan021 at hotmail.com>
Date: Fri, 27 Sep 2024 17:43:27 +1000
Subject: [PATCH 8/8] Fix xcvalu-c-api, alphabetize cmake list

---
 clang/lib/Headers/CMakeLists.txt              | 2 +-
 clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index a2ea1d80175a3a..ff392e7122a448 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -120,10 +120,10 @@ set(ppc_htm_files
 
 set(riscv_files
   riscv_bitmanip.h
+        riscv_corev_alu.h
   riscv_crypto.h
   riscv_ntlh.h
   sifive_vector.h
-  riscv_corev_alu.h
   )
 
 set(systemz_files
diff --git a/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c b/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c
index 2c7b6ff9ac2144..b4690a5f1c1ca5 100644
--- a/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c
+++ b/clang/test/CodeGen/RISCV/riscv-xcvalu-c-api.c
@@ -20,8 +20,8 @@
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
 // CHECK-NEXT:    [[TMP4:%.*]] = icmp sle i32 [[TMP2]], [[TMP3]]
-// CHECK-NEXT:    [[CONV_I:%.*]] = sext i1 [[TMP4]] to i32
-// CHECK-NEXT:    ret i32 [[CONV_I]]
+// CHECK-NEXT:    [[SLE_I:%.*]] = zext i1 [[TMP4]] to i32
+// CHECK-NEXT:    ret i32 [[SLE_I]]
 //
 int test_alu_slet(int32_t a, int32_t b) {
   return __riscv_cv_alu_slet(a, b);
@@ -42,8 +42,8 @@ int test_alu_slet(int32_t a, int32_t b) {
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[A_ADDR_I]], align 4
 // CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[B_ADDR_I]], align 4
 // CHECK-NEXT:    [[TMP4:%.*]] = icmp ule i32 [[TMP2]], [[TMP3]]
-// CHECK-NEXT:    [[CONV_I:%.*]] = sext i1 [[TMP4]] to i32
-// CHECK-NEXT:    ret i32 [[CONV_I]]
+// CHECK-NEXT:    [[SLEU_I:%.*]] = zext i1 [[TMP4]] to i32
+// CHECK-NEXT:    ret i32 [[SLEU_I]]
 //
 int test_alu_sletu(uint32_t a, uint32_t b) {
   return __riscv_cv_alu_sletu(a, b);



More information about the cfe-commits mailing list