[clang] [llvm] [RISCV] Preliminary P-ext intrinsics support (PR #174068)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Dec 30 23:42:20 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: SiHuaN (sihuan)
<details>
<summary>Changes</summary>
This patch adds initial intrinsic support for the RISC-V P extension, introducing padd and psub operations.
The implementation is based on the `Packed Addition and Subtraction` section of the P extension intrinsic specification:
---
Patch is 29.71 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/174068.diff
9 Files Affected:
- (modified) clang/include/clang/Basic/BuiltinsRISCV.td (+17)
- (modified) clang/lib/CodeGen/TargetBuiltins/RISCV.cpp (+28-3)
- (added) clang/test/CodeGen/RISCV/rvp-intrinsics.c (+223)
- (modified) llvm/include/llvm/IR/IntrinsicsRISCV.td (+12)
- (modified) llvm/lib/Target/RISCV/RISCVCallingConv.cpp (+46-29)
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+5)
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfoP.td (+36)
- (modified) llvm/lib/Target/RISCV/RISCVRegisterInfo.td (+12-2)
- (added) llvm/test/CodeGen/RISCV/rvp-intrinsics.ll (+135)
``````````diff
diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index 2dad5ede2d64b..1c43371cd52fc 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -137,6 +137,23 @@ def sm3p0 : RISCVBuiltin<"unsigned int(unsigned int)">;
def sm3p1 : RISCVBuiltin<"unsigned int(unsigned int)">;
} // Features = "zksh"
+//===----------------------------------------------------------------------===//
+// P extension.
+//===----------------------------------------------------------------------===//
+let Features = "experimental-p" in {
+def padd_v4i8 : RISCVBuiltin<"_Vector<4, char>(_Vector<4, char>, _Vector<4, char>)">;
+def padd_v2i16 : RISCVBuiltin<"_Vector<2, short>(_Vector<2, short>, _Vector<2, short>)">;
+def padd_v8i8 : RISCVBuiltin<"_Vector<8, char>(_Vector<8, char>, _Vector<8, char>)">;
+def padd_v4i16 : RISCVBuiltin<"_Vector<4, short>(_Vector<4, short>, _Vector<4, short>)">;
+def padd_v2i32 : RISCVBuiltin<"_Vector<2, int>(_Vector<2, int>, _Vector<2, int>)">;
+
+def psub_v4i8 : RISCVBuiltin<"_Vector<4, char>(_Vector<4, char>, _Vector<4, char>)">;
+def psub_v2i16 : RISCVBuiltin<"_Vector<2, short>(_Vector<2, short>, _Vector<2, short>)">;
+def psub_v8i8 : RISCVBuiltin<"_Vector<8, char>(_Vector<8, char>, _Vector<8, char>)">;
+def psub_v4i16 : RISCVBuiltin<"_Vector<4, short>(_Vector<4, short>, _Vector<4, short>)">;
+def psub_v2i32 : RISCVBuiltin<"_Vector<2, int>(_Vector<2, int>, _Vector<2, int>)">;
+} // Features = "experimental-p"
+
} // Attributes = [Const, NoThrow]
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index 2e11037f0dcd0..8cc8b03db0137 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -1143,7 +1143,17 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
case RISCV::BI__builtin_riscv_brev8_32:
case RISCV::BI__builtin_riscv_brev8_64:
case RISCV::BI__builtin_riscv_zip_32:
- case RISCV::BI__builtin_riscv_unzip_32: {
+ case RISCV::BI__builtin_riscv_unzip_32:
+ case RISCV::BI__builtin_riscv_padd_v4i8:
+ case RISCV::BI__builtin_riscv_padd_v2i16:
+ case RISCV::BI__builtin_riscv_padd_v8i8:
+ case RISCV::BI__builtin_riscv_padd_v4i16:
+ case RISCV::BI__builtin_riscv_padd_v2i32:
+ case RISCV::BI__builtin_riscv_psub_v4i8:
+ case RISCV::BI__builtin_riscv_psub_v2i16:
+ case RISCV::BI__builtin_riscv_psub_v8i8:
+ case RISCV::BI__builtin_riscv_psub_v4i16:
+ case RISCV::BI__builtin_riscv_psub_v2i32: {
switch (BuiltinID) {
default: llvm_unreachable("unexpected builtin ID");
// Zbb
@@ -1187,11 +1197,26 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
case RISCV::BI__builtin_riscv_unzip_32:
ID = Intrinsic::riscv_unzip;
break;
- }
+ // P extension
+ case RISCV::BI__builtin_riscv_padd_v4i8:
+ case RISCV::BI__builtin_riscv_padd_v2i16:
+ case RISCV::BI__builtin_riscv_padd_v8i8:
+ case RISCV::BI__builtin_riscv_padd_v4i16:
+ case RISCV::BI__builtin_riscv_padd_v2i32:
+ ID = Intrinsic::riscv_padd;
+ break;
+ case RISCV::BI__builtin_riscv_psub_v4i8:
+ case RISCV::BI__builtin_riscv_psub_v2i16:
+ case RISCV::BI__builtin_riscv_psub_v8i8:
+ case RISCV::BI__builtin_riscv_psub_v4i16:
+ case RISCV::BI__builtin_riscv_psub_v2i32:
+ ID = Intrinsic::riscv_psub;
+ break;
+ }
+ }
IntrinsicTypes = {ResultType};
break;
- }
// Zk builtins
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
new file mode 100644
index 0000000000000..2d047f2438e8b
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
@@ -0,0 +1,223 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
+// RUN: %clang_cc1 -triple riscv32 -target-feature +experimental-p -emit-llvm %s -O2 -o - | FileCheck %s --check-prefix=RV32
+// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-p -emit-llvm %s -O2 -o - | FileCheck %s --check-prefix=RV64
+
+#include <stdint.h>
+
+typedef int8_t v4i8 __attribute__((vector_size(4)));
+typedef int16_t v2i16 __attribute__((vector_size(4)));
+typedef int8_t v8i8 __attribute__((vector_size(8)));
+typedef int16_t v4i16 __attribute__((vector_size(8)));
+typedef int32_t v2i32 __attribute__((vector_size(8)));
+
+// RV32-LABEL: @test_padd_v4i8(
+// RV32-NEXT: entry:
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8>
+// RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.padd.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+// RV32-NEXT: ret i32 [[TMP3]]
+//
+// RV64-LABEL: @test_padd_v4i8(
+// RV64-NEXT: entry:
+// RV64-NEXT: [[COERCE_VAL_II:%.*]] = trunc i64 [[A_COERCE:%.*]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II]] to <4 x i8>
+// RV64-NEXT: [[COERCE_VAL_II1:%.*]] = trunc i64 [[B_COERCE:%.*]] to i32
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1]] to <4 x i8>
+// RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.padd.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64
+// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+//
+v4i8 test_padd_v4i8(v4i8 a, v4i8 b) {
+ return __builtin_riscv_padd_v4i8(a, b);
+}
+
+// RV32-LABEL: @test_padd_v2i16(
+// RV32-NEXT: entry:
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16>
+// RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.padd.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32
+// RV32-NEXT: ret i32 [[TMP3]]
+//
+// RV64-LABEL: @test_padd_v2i16(
+// RV64-NEXT: entry:
+// RV64-NEXT: [[COERCE_VAL_II:%.*]] = trunc i64 [[A_COERCE:%.*]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II]] to <2 x i16>
+// RV64-NEXT: [[COERCE_VAL_II1:%.*]] = trunc i64 [[B_COERCE:%.*]] to i32
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1]] to <2 x i16>
+// RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.padd.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64
+// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+//
+v2i16 test_padd_v2i16(v2i16 a, v2i16 b) {
+ return __builtin_riscv_padd_v2i16(a, b);
+}
+
+// RV32-LABEL: @test_padd_v8i8(
+// RV32-NEXT: entry:
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8>
+// RV32-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.padd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64
+// RV32-NEXT: ret i64 [[TMP3]]
+//
+// RV64-LABEL: @test_padd_v8i8(
+// RV64-NEXT: entry:
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8>
+// RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.padd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64
+// RV64-NEXT: ret i64 [[TMP3]]
+//
+v8i8 test_padd_v8i8(v8i8 a, v8i8 b) {
+ return __builtin_riscv_padd_v8i8(a, b);
+}
+
+// RV32-LABEL: @test_padd_v4i16(
+// RV32-NEXT: entry:
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16>
+// RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.padd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64
+// RV32-NEXT: ret i64 [[TMP3]]
+//
+// RV64-LABEL: @test_padd_v4i16(
+// RV64-NEXT: entry:
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16>
+// RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.padd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64
+// RV64-NEXT: ret i64 [[TMP3]]
+//
+v4i16 test_padd_v4i16(v4i16 a, v4i16 b) {
+ return __builtin_riscv_padd_v4i16(a, b);
+}
+
+// RV32-LABEL: @test_padd_v2i32(
+// RV32-NEXT: entry:
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <2 x i32>
+// RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.padd.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64
+// RV32-NEXT: ret i64 [[TMP3]]
+//
+// RV64-LABEL: @test_padd_v2i32(
+// RV64-NEXT: entry:
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <2 x i32>
+// RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.padd.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64
+// RV64-NEXT: ret i64 [[TMP3]]
+//
+v2i32 test_padd_v2i32(v2i32 a, v2i32 b) {
+ return __builtin_riscv_padd_v2i32(a, b);
+}
+
+// RV32-LABEL: @test_psub_v4i8(
+// RV32-NEXT: entry:
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <4 x i8>
+// RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.psub.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+// RV32-NEXT: ret i32 [[TMP3]]
+//
+// RV64-LABEL: @test_psub_v4i8(
+// RV64-NEXT: entry:
+// RV64-NEXT: [[COERCE_VAL_II:%.*]] = trunc i64 [[A_COERCE:%.*]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II]] to <4 x i8>
+// RV64-NEXT: [[COERCE_VAL_II1:%.*]] = trunc i64 [[B_COERCE:%.*]] to i32
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1]] to <4 x i8>
+// RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i8> @llvm.riscv.psub.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64
+// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+//
+v4i8 test_psub_v4i8(v4i8 a, v4i8 b) {
+ return __builtin_riscv_psub_v4i8(a, b);
+}
+
+// RV32-LABEL: @test_psub_v2i16(
+// RV32-NEXT: entry:
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE:%.*]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE:%.*]] to <2 x i16>
+// RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.psub.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32
+// RV32-NEXT: ret i32 [[TMP3]]
+//
+// RV64-LABEL: @test_psub_v2i16(
+// RV64-NEXT: entry:
+// RV64-NEXT: [[COERCE_VAL_II:%.*]] = trunc i64 [[A_COERCE:%.*]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II]] to <2 x i16>
+// RV64-NEXT: [[COERCE_VAL_II1:%.*]] = trunc i64 [[B_COERCE:%.*]] to i32
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1]] to <2 x i16>
+// RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i16> @llvm.riscv.psub.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64
+// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+//
+v2i16 test_psub_v2i16(v2i16 a, v2i16 b) {
+ return __builtin_riscv_psub_v2i16(a, b);
+}
+
+// RV32-LABEL: @test_psub_v8i8(
+// RV32-NEXT: entry:
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8>
+// RV32-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.psub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64
+// RV32-NEXT: ret i64 [[TMP3]]
+//
+// RV64-LABEL: @test_psub_v8i8(
+// RV64-NEXT: entry:
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <8 x i8>
+// RV64-NEXT: [[TMP2:%.*]] = tail call <8 x i8> @llvm.riscv.psub.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64
+// RV64-NEXT: ret i64 [[TMP3]]
+//
+v8i8 test_psub_v8i8(v8i8 a, v8i8 b) {
+ return __builtin_riscv_psub_v8i8(a, b);
+}
+
+// RV32-LABEL: @test_psub_v4i16(
+// RV32-NEXT: entry:
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16>
+// RV32-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.psub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64
+// RV32-NEXT: ret i64 [[TMP3]]
+//
+// RV64-LABEL: @test_psub_v4i16(
+// RV64-NEXT: entry:
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <4 x i16>
+// RV64-NEXT: [[TMP2:%.*]] = tail call <4 x i16> @llvm.riscv.psub.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64
+// RV64-NEXT: ret i64 [[TMP3]]
+//
+v4i16 test_psub_v4i16(v4i16 a, v4i16 b) {
+ return __builtin_riscv_psub_v4i16(a, b);
+}
+
+// RV32-LABEL: @test_psub_v2i32(
+// RV32-NEXT: entry:
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <2 x i32>
+// RV32-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.psub.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64
+// RV32-NEXT: ret i64 [[TMP3]]
+//
+// RV64-LABEL: @test_psub_v2i32(
+// RV64-NEXT: entry:
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE:%.*]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE:%.*]] to <2 x i32>
+// RV64-NEXT: [[TMP2:%.*]] = tail call <2 x i32> @llvm.riscv.psub.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i32> [[TMP2]] to i64
+// RV64-NEXT: ret i64 [[TMP3]]
+//
+v2i32 test_psub_v2i32(v2i32 a, v2i32 b) {
+ return __builtin_riscv_psub_v2i32(a, b);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index 9088e5e6a357b..c35e09e372e89 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -1978,6 +1978,18 @@ let TargetPrefix = "riscv" in {
defm vfncvt_sat_f_f_q_alt : RISCVConversionRoundingMode;
} // TargetPrefix = "riscv"
+//===----------------------------------------------------------------------===//
+// Packed SIMD (P) Extension
+
+let TargetPrefix = "riscv" in {
+ def int_riscv_padd : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem]>;
+ def int_riscv_psub : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>],
+ [IntrNoMem]>;
+}
+
// Vendor extensions
//===----------------------------------------------------------------------===//
include "llvm/IR/IntrinsicsRISCVXTHead.td"
diff --git a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
index 78f47794a5b66..c8688d8aefaf3 100644
--- a/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
+++ b/llvm/lib/Target/RISCV/RISCVCallingConv.cpp
@@ -545,37 +545,53 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT,
unsigned StoreSizeBytes = XLen / 8;
Align StackAlign = Align(XLen / 8);
+ static const MCPhysReg ArgGPRPairs[] = {RISCV::X10_X11, RISCV::X12_X13,
+ RISCV::X14_X15, RISCV::X16_X17};
+
if (ValVT.isVector() || ValVT.isRISCVVectorTuple()) {
- Reg = allocateRVVReg(ValVT, ValNo, State, TLI);
- if (Reg) {
- // Fixed-length vectors are located in the corresponding scalable-vector
- // container types.
- if (ValVT.isFixedLengthVector()) {
- LocVT = TLI.getContainerForFixedLengthVector(LocVT);
- State.addLoc(
- CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
- return false;
- }
- } else {
- // For return values, the vector must be passed fully via registers or
- // via the stack.
- // FIXME: The proposed vector ABI only mandates v8-v15 for return values,
- // but we're using all of them.
- if (IsRet)
- return true;
- // Try using a GPR to pass the address
- if ((Reg = State.AllocateReg(ArgGPRs))) {
- LocVT = XLenVT;
- LocInfo = CCValAssign::Indirect;
- } else if (ValVT.isScalableVector()) {
- LocVT = XLenVT;
- LocInfo = CCValAssign::Indirect;
+ bool IsPVectorInGPR = false;
+ if (Subtarget.enablePExtSIMDCodeGen() && ValVT.isVector()) {
+ const TargetRegisterClass *RC = TLI.getRegClassFor(ValVT);
+ if (RC == &RISCV::GPRRegClass || RC == &RISCV::GPRPairRegClass)
+ IsPVectorInGPR = true;
+ }
+
+ if (!IsPVectorInGPR) {
+ Reg = allocateRVVReg(ValVT, ValNo, State, TLI);
+ if (Reg) {
+ // Fixed-length vectors are located in the corresponding scalable-vector
+ // container types.
+ if (ValVT.isFixedLengthVector()) {
+ LocVT = TLI.getContainerForFixedLengthVector(LocVT);
+ State.addLoc(
+ CCValAssign::getCustomReg(ValNo, ValVT, Reg, LocVT, LocInfo));
+ return false;
+ }
} else {
- StoreSizeBytes = ValVT.getStoreSize();
- // Align vectors to their element sizes, being careful for vXi1
- // vectors.
- StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
+ // For return values, the vector must be passed fully via registers or
+ // via the stack.
+ // FIXME: The proposed vector ABI only mandates v8-v15 for return
+ // values, but we're using all of them.
+ if (IsRet)
+ return true;
+ // Try using a GPR to pass the address
+ if ((Reg = State.AllocateReg(ArgGPRs))) {
+ LocVT = XLenVT;
+ LocInfo = CCValAssign::Indirect;
+ } else if (ValVT.isScalableVector()) {
+ LocVT = XLenVT;
+ LocInfo = CCValAssign::Indirect;
+ } else {
+ StoreSizeBytes = ValVT.getStoreSize();
+ // Align vectors to their element sizes, being careful for vXi1
+ // vectors.
+ StackAlign = MaybeAlign(ValVT.getScalarSizeInBits() / 8).valueOrOne();
+ }
}
+ } else if (XLen == 32 && ValVT.getSizeInBits() == 64) {
+ Reg = State.AllocateReg(ArgGPRPairs);
+ } else {
+ Reg = State.AllocateReg(ArgGPRs);
}
} else {
Reg = State.AllocateReg(ArgGPRs);
@@ -604,7 +620,8 @@ bool llvm::CC_RISCV(unsigned ValNo, MVT ValVT, MVT LocVT,
assert(((ValVT.isFloatingPoint() && !ValVT.isVector()) || LocVT == XLenVT ||
(TLI.getSubtarget().hasVInstructions() &&
- (ValVT.isVector() || ValVT.isRISCVVectorTuple()))) &&
+ (ValVT.isVector() || ValVT.isRISCVVectorTuple())) ||
+ (Subtarget.enablePExtSIMDCodeGen() && ValVT.isVector())) &&
"Expected an XLenVT or vector types at this stage");
if (Reg) {
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index c60f740d37576..d084c1cfdc8b4 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -293,9 +293,14 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
addRegisterClass(MVT...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/174068
More information about the llvm-commits
mailing list