[clang] [llvm] [RISCV][P-ext] Support Packed Absolute Value and Absolute Difference (PR #203840)
via cfe-commits
cfe-commits at lists.llvm.org
Tue Jun 16 22:10:40 PDT 2026
https://github.com/TelGome updated https://github.com/llvm/llvm-project/pull/203840
>From 9d96c0344c1659a05d740ac33dc0c64b84ce39a8 Mon Sep 17 00:00:00 2001
From: Dongyan Chen <chendongyan at isrc.iscas.ac.cn>
Date: Mon, 15 Jun 2026 07:37:49 +0000
Subject: [PATCH 1/3] [RISCV][P-ext] Support Packed Absolute Value and Absolute
Difference
---
clang/include/clang/Basic/BuiltinsRISCV.td | 16 ++
clang/lib/CodeGen/TargetBuiltins/RISCV.cpp | 33 ++-
clang/lib/Headers/riscv_packed_simd.h | 29 ++
clang/test/CodeGen/RISCV/rvp-intrinsics.c | 256 ++++++++++++++++++
.../riscv_packed_simd.c | 70 +++++
llvm/include/llvm/IR/IntrinsicsRISCV.td | 12 +-
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 36 ++-
llvm/test/CodeGen/RISCV/rvp-simd-32.ll | 54 ++++
llvm/test/CodeGen/RISCV/rvp-simd-64.ll | 85 ++++++
9 files changed, 587 insertions(+), 4 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index 185269bfc6d85..4cea2612fffdd 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -181,6 +181,22 @@ def pasubu_u8x8 : RISCVBuiltin<"_Vector<8, unsigned char>(_Vector<8, unsigned ch
def pasubu_u16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, unsigned short>, _Vector<4, unsigned short>)">;
def pasubu_u32x2 : RISCVBuiltin<"_Vector<2, unsigned int>(_Vector<2, unsigned int>, _Vector<2, unsigned int>)">;
+// Packed Absolute Value and Absolute Difference (32-bit)
+def pabs_i8x4 : RISCVBuiltin<"_Vector<4, unsigned char>(_Vector<4, signed char>)">;
+def pabs_i16x2 : RISCVBuiltin<"_Vector<2, unsigned short>(_Vector<2, short>)">;
+def pabd_i8x4 : RISCVBuiltin<"_Vector<4, unsigned char>(_Vector<4, signed char>, _Vector<4, signed char>)">;
+def pabd_i16x2 : RISCVBuiltin<"_Vector<2, unsigned short>(_Vector<2, short>, _Vector<2, short>)">;
+def pabdu_u8x4 : RISCVBuiltin<"_Vector<4, unsigned char>(_Vector<4, unsigned char>, _Vector<4, unsigned char>)">;
+def pabdu_u16x2 : RISCVBuiltin<"_Vector<2, unsigned short>(_Vector<2, unsigned short>, _Vector<2, unsigned short>)">;
+
+// Packed Absolute Value and Absolute Difference (64-bit)
+def pabs_i8x8 : RISCVBuiltin<"_Vector<8, unsigned char>(_Vector<8, signed char>)">;
+def pabs_i16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, short>)">;
+def pabd_i8x8 : RISCVBuiltin<"_Vector<8, unsigned char>(_Vector<8, signed char>, _Vector<8, signed char>)">;
+def pabd_i16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, short>, _Vector<4, short>)">;
+def pabdu_u8x8 : RISCVBuiltin<"_Vector<8, unsigned char>(_Vector<8, unsigned char>, _Vector<8, unsigned char>)">;
+def pabdu_u16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, unsigned short>, _Vector<4, unsigned short>)">;
+
} // Features = "experimental-p"
//===----------------------------------------------------------------------===//
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index 8c0684110dad7..44ec84f741b29 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -1219,7 +1219,20 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
case RISCV::BI__builtin_riscv_pasubu_u16x2:
case RISCV::BI__builtin_riscv_pasubu_u8x8:
case RISCV::BI__builtin_riscv_pasubu_u16x4:
- case RISCV::BI__builtin_riscv_pasubu_u32x2: {
+ case RISCV::BI__builtin_riscv_pasubu_u32x2:
+ // Packed Absolute Value and Absolute Difference
+ case RISCV::BI__builtin_riscv_pabs_i8x4:
+ case RISCV::BI__builtin_riscv_pabs_i16x2:
+ case RISCV::BI__builtin_riscv_pabs_i8x8:
+ case RISCV::BI__builtin_riscv_pabs_i16x4:
+ case RISCV::BI__builtin_riscv_pabd_i8x4:
+ case RISCV::BI__builtin_riscv_pabd_i16x2:
+ case RISCV::BI__builtin_riscv_pabd_i8x8:
+ case RISCV::BI__builtin_riscv_pabd_i16x4:
+ case RISCV::BI__builtin_riscv_pabdu_u8x4:
+ case RISCV::BI__builtin_riscv_pabdu_u16x2:
+ case RISCV::BI__builtin_riscv_pabdu_u8x8:
+ case RISCV::BI__builtin_riscv_pabdu_u16x4: {
switch (BuiltinID) {
default:
llvm_unreachable("unexpected builtin ID");
@@ -1251,6 +1264,24 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
case RISCV::BI__builtin_riscv_pasubu_u32x2:
ID = Intrinsic::riscv_pasubu;
break;
+ case RISCV::BI__builtin_riscv_pabs_i8x4:
+ case RISCV::BI__builtin_riscv_pabs_i16x2:
+ case RISCV::BI__builtin_riscv_pabs_i8x8:
+ case RISCV::BI__builtin_riscv_pabs_i16x4:
+ ID = Intrinsic::riscv_pabs;
+ break;
+ case RISCV::BI__builtin_riscv_pabd_i8x4:
+ case RISCV::BI__builtin_riscv_pabd_i16x2:
+ case RISCV::BI__builtin_riscv_pabd_i8x8:
+ case RISCV::BI__builtin_riscv_pabd_i16x4:
+ ID = Intrinsic::riscv_pabd;
+ break;
+ case RISCV::BI__builtin_riscv_pabdu_u8x4:
+ case RISCV::BI__builtin_riscv_pabdu_u16x2:
+ case RISCV::BI__builtin_riscv_pabdu_u8x8:
+ case RISCV::BI__builtin_riscv_pabdu_u16x4:
+ ID = Intrinsic::riscv_pabdu;
+ break;
}
IntrinsicTypes = {ResultType};
diff --git a/clang/lib/Headers/riscv_packed_simd.h b/clang/lib/Headers/riscv_packed_simd.h
index 7e981c91ec3e1..b178c74e0635d 100644
--- a/clang/lib/Headers/riscv_packed_simd.h
+++ b/clang/lib/Headers/riscv_packed_simd.h
@@ -92,6 +92,17 @@ typedef uint32_t uint32x2_t __attribute__((__vector_size__(8)));
return (rty)(__rs1 op __rs2); \
}
+#define __packed_unary_builtin_cast(name, ty, rty, builtin) \
+ static __inline__ rty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1) { \
+ return (rty)builtin(__rs1); \
+ }
+
+#define __packed_binary_builtin_cast(name, ty, rty, builtin) \
+ static __inline__ rty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, \
+ ty __rs2) { \
+ return (rty)builtin(__rs1, __rs2); \
+ }
+
// clang-format off: macro call sites have no trailing semicolons, which
// confuses clang-format into a deeply nested expression.
@@ -375,6 +386,22 @@ __packed_binary_builtin(pasubu_u8x8, uint8x8_t, __builtin_riscv_pasubu_u8x8)
__packed_binary_builtin(pasubu_u16x4, uint16x4_t, __builtin_riscv_pasubu_u16x4)
__packed_binary_builtin(pasubu_u32x2, uint32x2_t, __builtin_riscv_pasubu_u32x2)
+/* Packed Absolute Value and Absolute Difference (32-bit) */
+__packed_unary_builtin_cast(pabs_i8x4, int8x4_t, uint8x4_t, __builtin_riscv_pabs_i8x4)
+__packed_unary_builtin_cast(pabs_i16x2, int16x2_t, uint16x2_t, __builtin_riscv_pabs_i16x2)
+__packed_binary_builtin_cast(pabd_i8x4, int8x4_t, uint8x4_t, __builtin_riscv_pabd_i8x4)
+__packed_binary_builtin_cast(pabd_i16x2, int16x2_t, uint16x2_t, __builtin_riscv_pabd_i16x2)
+__packed_binary_builtin_cast(pabdu_u8x4, uint8x4_t, uint8x4_t, __builtin_riscv_pabdu_u8x4)
+__packed_binary_builtin_cast(pabdu_u16x2, uint16x2_t, uint16x2_t, __builtin_riscv_pabdu_u16x2)
+
+/* Packed Absolute Value and Absolute Difference (64-bit) */
+__packed_unary_builtin_cast(pabs_i8x8, int8x8_t, uint8x8_t, __builtin_riscv_pabs_i8x8)
+__packed_unary_builtin_cast(pabs_i16x4, int16x4_t, uint16x4_t, __builtin_riscv_pabs_i16x4)
+__packed_binary_builtin_cast(pabd_i8x8, int8x8_t, uint8x8_t, __builtin_riscv_pabd_i8x8)
+__packed_binary_builtin_cast(pabd_i16x4, int16x4_t, uint16x4_t, __builtin_riscv_pabd_i16x4)
+__packed_binary_builtin_cast(pabdu_u8x8, uint8x8_t, uint8x8_t, __builtin_riscv_pabdu_u8x8)
+__packed_binary_builtin_cast(pabdu_u16x4, uint16x4_t, uint16x4_t, __builtin_riscv_pabdu_u16x4)
+
// clang-format on
#undef __packed_splat2
@@ -392,6 +419,8 @@ __packed_binary_builtin(pasubu_u32x2, uint32x2_t, __builtin_riscv_pasubu_u32x2)
#undef __packed_sh1add
#undef __packed_sh1sadd
#undef __packed_cmp
+#undef __packed_unary_builtin_cast
+#undef __packed_binary_builtin_cast
#undef __DEFAULT_FN_ATTRS
#if defined(__cplusplus)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
index 363bfa5f5d995..f4e949084b9f9 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
@@ -5235,3 +5235,259 @@ uint16x4_t test_pasubu_u16x4(uint16x4_t rs1, uint16x4_t rs2) {
uint32x2_t test_pasubu_u32x2(uint32x2_t rs1, uint32x2_t rs2) {
return __riscv_pasubu_u32x2(rs1, rs2);
}
+
+// RV32-LABEL: define dso_local i32 @test_pabs_i8x4(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = call <4 x i8> @llvm.riscv.pabs.v4i8(<4 x i8> [[TMP0]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[TMP1]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_pabs_i8x4(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = call <4 x i8> @llvm.riscv.pabs.v4i8(<4 x i8> [[TMP0]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[TMP1]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+uint8x4_t test_pabs_i8x4(int8x4_t rs1) {
+ return __riscv_pabs_i8x4(rs1);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pabs_i16x2(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.riscv.pabs.v2i16(<2 x i16> [[TMP0]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_pabs_i16x2(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.riscv.pabs.v2i16(<2 x i16> [[TMP0]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+uint16x2_t test_pabs_i16x2(int16x2_t rs1) {
+ return __riscv_pabs_i16x2(rs1);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pabd_i8x4(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.pabd.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+// RV32-NEXT: ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i32 @test_pabd_i8x4(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.pabd.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+// RV64-NEXT: ret i32 [[TMP3]]
+//
+uint8x4_t test_pabd_i8x4(int8x4_t rs1, int8x4_t rs2) {
+ return __riscv_pabd_i8x4(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pabd_i16x2(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pabd.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32
+// RV32-NEXT: ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i32 @test_pabd_i16x2(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pabd.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32
+// RV64-NEXT: ret i32 [[TMP3]]
+//
+uint16x2_t test_pabd_i16x2(int16x2_t rs1, int16x2_t rs2) {
+ return __riscv_pabd_i16x2(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pabdu_u8x4(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.pabdu.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+// RV32-NEXT: ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i32 @test_pabdu_u8x4(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP2:%.*]] = call <4 x i8> @llvm.riscv.pabdu.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[TMP2]] to i32
+// RV64-NEXT: ret i32 [[TMP3]]
+//
+uint8x4_t test_pabdu_u8x4(uint8x4_t rs1, uint8x4_t rs2) {
+ return __riscv_pabdu_u8x4(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pabdu_u16x2(
+// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pabdu.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32
+// RV32-NEXT: ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i32 @test_pabdu_u16x2(
+// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]], i32 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[RS2_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP2:%.*]] = call <2 x i16> @llvm.riscv.pabdu.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[TMP2]] to i32
+// RV64-NEXT: ret i32 [[TMP3]]
+//
+uint16x2_t test_pabdu_u16x2(uint16x2_t rs1, uint16x2_t rs2) {
+ return __riscv_pabdu_u16x2(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pabs_i8x8(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.riscv.pabs.v8i8(<8 x i8> [[TMP0]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_pabs_i8x8(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.riscv.pabs.v8i8(<8 x i8> [[TMP0]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint8x8_t test_pabs_i8x8(int8x8_t rs1) {
+ return __riscv_pabs_i8x8(rs1);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pabs_i16x4(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.riscv.pabs.v4i16(<4 x i16> [[TMP0]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_pabs_i16x4(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.riscv.pabs.v4i16(<4 x i16> [[TMP0]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint16x4_t test_pabs_i16x4(int16x4_t rs1) {
+ return __riscv_pabs_i16x4(rs1);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pabd_i8x8(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.pabd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64
+// RV32-NEXT: ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_pabd_i8x8(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.pabd.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64
+// RV64-NEXT: ret i64 [[TMP3]]
+//
+uint8x8_t test_pabd_i8x8(int8x8_t rs1, int8x8_t rs2) {
+ return __riscv_pabd_i8x8(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pabd_i16x4(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pabd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64
+// RV32-NEXT: ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_pabd_i16x4(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pabd.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64
+// RV64-NEXT: ret i64 [[TMP3]]
+//
+uint16x4_t test_pabd_i16x4(int16x4_t rs1, int16x4_t rs2) {
+ return __riscv_pabd_i16x4(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pabdu_u8x8(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.pabdu.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64
+// RV32-NEXT: ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_pabdu_u8x8(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP2:%.*]] = call <8 x i8> @llvm.riscv.pabdu.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[TMP2]] to i64
+// RV64-NEXT: ret i64 [[TMP3]]
+//
+uint8x8_t test_pabdu_u8x8(uint8x8_t rs1, uint8x8_t rs2) {
+ return __riscv_pabdu_u8x8(rs1, rs2);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pabdu_u16x4(
+// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pabdu.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64
+// RV32-NEXT: ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_pabdu_u16x4(
+// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]], i64 noundef [[RS2_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[RS2_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP2:%.*]] = call <4 x i16> @llvm.riscv.pabdu.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[TMP2]] to i64
+// RV64-NEXT: ret i64 [[TMP3]]
+//
+uint16x4_t test_pabdu_u16x4(uint16x4_t rs1, uint16x4_t rs2) {
+ return __riscv_pabdu_u16x4(rs1, rs2);
+}
diff --git a/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
index edbc56ce0e199..e9f90fc17e23d 100644
--- a/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
+++ b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
@@ -1772,3 +1772,73 @@ uint16x4_t test_pasubu_u16x4(uint16x4_t a, uint16x4_t b) {
uint32x2_t test_pasubu_u32x2(uint32x2_t a, uint32x2_t b) {
return __riscv_pasubu_u32x2(a, b);
}
+
+// CHECK-LABEL: test_pabs_i8x4:
+// CHECK: pabs.b
+uint8x4_t test_pabs_i8x4(int8x4_t a) { return __riscv_pabs_i8x4(a); }
+
+// CHECK-LABEL: test_pabs_i16x2:
+// CHECK: pabs.h
+uint16x2_t test_pabs_i16x2(int16x2_t a) { return __riscv_pabs_i16x2(a); }
+
+// CHECK-LABEL: test_pabd_i8x4:
+// CHECK: pabd.b
+uint8x4_t test_pabd_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_pabd_i8x4(a, b);
+}
+
+// CHECK-LABEL: test_pabd_i16x2:
+// CHECK: pabd.h
+uint16x2_t test_pabd_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_pabd_i16x2(a, b);
+}
+
+// CHECK-LABEL: test_pabdu_u8x4:
+// CHECK: pabdu.b
+uint8x4_t test_pabdu_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_pabdu_u8x4(a, b);
+}
+
+// CHECK-LABEL: test_pabdu_u16x2:
+// CHECK: pabdu.h
+uint16x2_t test_pabdu_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_pabdu_u16x2(a, b);
+}
+
+// CHECK-LABEL: test_pabs_i8x8:
+// RV32: pabs.db
+// RV64: pabs.b
+uint8x8_t test_pabs_i8x8(int8x8_t a) { return __riscv_pabs_i8x8(a); }
+
+// CHECK-LABEL: test_pabs_i16x4:
+// RV32: pabs.dh
+// RV64: pabs.h
+uint16x4_t test_pabs_i16x4(int16x4_t a) { return __riscv_pabs_i16x4(a); }
+
+// CHECK-LABEL: test_pabd_i8x8:
+// RV32: pabd.db
+// RV64: pabd.b
+uint8x8_t test_pabd_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_pabd_i8x8(a, b);
+}
+
+// CHECK-LABEL: test_pabd_i16x4:
+// RV32: pabd.dh
+// RV64: pabd.h
+uint16x4_t test_pabd_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_pabd_i16x4(a, b);
+}
+
+// CHECK-LABEL: test_pabdu_u8x8:
+// RV32: pabdu.db
+// RV64: pabdu.b
+uint8x8_t test_pabdu_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_pabdu_u8x8(a, b);
+}
+
+// CHECK-LABEL: test_pabdu_u16x4:
+// RV32: pabdu.dh
+// RV64: pabdu.h
+uint16x4_t test_pabdu_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_pabdu_u16x4(a, b);
+}
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index b2add44b19a5e..aa8a20cf52b62 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -2046,16 +2046,26 @@ def int_riscv_pause : DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrHasSideEffec
// Packed SIMD extensions
//===----------------------------------------------------------------------===//
let TargetPrefix = "riscv" in {
-// Packed Averaging Addition and Subtraction.
+class RVPUnaryIntrinsic
+ : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>],
+ [IntrNoMem, IntrSpeculatable]>;
+
class RVPBinaryIntrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
[IntrNoMem, IntrSpeculatable]>;
+ // Packed Averaging Addition and Subtraction.
def int_riscv_paadd : RVPBinaryIntrinsic;
def int_riscv_paaddu : RVPBinaryIntrinsic;
def int_riscv_pasub : RVPBinaryIntrinsic;
def int_riscv_pasubu : RVPBinaryIntrinsic;
+
+ // Packed Absolute Value and Absolute Difference
+ def int_riscv_pabs : RVPUnaryIntrinsic;
+ def int_riscv_pabd : RVPBinaryIntrinsic;
+ def int_riscv_pabdu : RVPBinaryIntrinsic;
} // TargetPrefix = "riscv"
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 4a937fe2c6462..2afebfeb68bbe 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -11736,10 +11736,14 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? ISD::CLMULH : ISD::CLMULR;
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
}
+ case Intrinsic::riscv_pabs:
+ return DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op.getOperand(1));
case Intrinsic::riscv_paadd:
case Intrinsic::riscv_paaddu:
case Intrinsic::riscv_pasub:
- case Intrinsic::riscv_pasubu: {
+ case Intrinsic::riscv_pasubu:
+ case Intrinsic::riscv_pabd:
+ case Intrinsic::riscv_pabdu: {
unsigned Opc;
switch (IntNo) {
case Intrinsic::riscv_paadd:
@@ -11754,6 +11758,12 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
case Intrinsic::riscv_pasubu:
Opc = RISCVISD::ASUBU;
break;
+ case Intrinsic::riscv_pabd:
+ Opc = ISD::ABDS;
+ break;
+ case Intrinsic::riscv_pabdu:
+ Opc = ISD::ABDU;
+ break;
}
return DAG.getNode(Opc, DL, Op.getValueType(), Op.getOperand(1),
@@ -15663,10 +15673,26 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
}
+ case Intrinsic::riscv_pabs: {
+ EVT VT = N->getValueType(0);
+ if (!Subtarget.is64Bit() || (VT != MVT::v4i8 && VT != MVT::v2i16))
+ return;
+
+ EVT WideVT = VT == MVT::v4i8 ? MVT::v8i8 : MVT::v4i16;
+ SDValue Undef = DAG.getUNDEF(VT);
+ SDValue Op0 =
+ DAG.getNode(ISD::CONCAT_VECTORS, DL, WideVT, N->getOperand(1), Undef);
+ SDValue Res = DAG.getNode(ISD::ABS, DL, WideVT, Op0);
+ Results.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
+ DAG.getVectorIdxConstant(0, DL)));
+ return;
+ }
case Intrinsic::riscv_paadd:
case Intrinsic::riscv_paaddu:
case Intrinsic::riscv_pasub:
- case Intrinsic::riscv_pasubu: {
+ case Intrinsic::riscv_pasubu:
+ case Intrinsic::riscv_pabd:
+ case Intrinsic::riscv_pabdu: {
EVT VT = N->getValueType(0);
if (!Subtarget.is64Bit() || (VT != MVT::v4i8 && VT != MVT::v2i16))
return;
@@ -15685,6 +15711,12 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
case Intrinsic::riscv_pasubu:
Opc = RISCVISD::ASUBU;
break;
+ case Intrinsic::riscv_pabd:
+ Opc = ISD::ABDS;
+ break;
+ case Intrinsic::riscv_pabdu:
+ Opc = ISD::ABDU;
+ break;
}
EVT WideVT = VT == MVT::v4i8 ? MVT::v8i8 : MVT::v4i16;
diff --git a/llvm/test/CodeGen/RISCV/rvp-simd-32.ll b/llvm/test/CodeGen/RISCV/rvp-simd-32.ll
index cddea9ae32d25..2df6011878fc8 100644
--- a/llvm/test/CodeGen/RISCV/rvp-simd-32.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-simd-32.ll
@@ -2758,3 +2758,57 @@ define <2 x i16> @test_pasubu_v2i16(<2 x i16> %a, <2 x i16> %b) {
%res = call <2 x i16> @llvm.riscv.pasubu.v2i16(<2 x i16> %a, <2 x i16> %b)
ret <2 x i16> %res
}
+
+define <4 x i8> @test_pabs_v4i8(<4 x i8> %a) {
+; CHECK-LABEL: test_pabs_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pabs.b a0, a0
+; CHECK-NEXT: ret
+ %res = call <4 x i8> @llvm.riscv.pabs.v4i8(<4 x i8> %a)
+ ret <4 x i8> %res
+}
+
+define <2 x i16> @test_pabs_v2i16(<2 x i16> %a) {
+; CHECK-LABEL: test_pabs_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pabs.h a0, a0
+; CHECK-NEXT: ret
+ %res = call <2 x i16> @llvm.riscv.pabs.v2i16(<2 x i16> %a)
+ ret <2 x i16> %res
+}
+
+define <4 x i8> @test_pabd_v4i8(<4 x i8> %a, <4 x i8> %b) {
+; CHECK-LABEL: test_pabd_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pabd.b a0, a0, a1
+; CHECK-NEXT: ret
+ %res = call <4 x i8> @llvm.riscv.pabd.v4i8(<4 x i8> %a, <4 x i8> %b)
+ ret <4 x i8> %res
+}
+
+define <2 x i16> @test_pabd_v2i16(<2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: test_pabd_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pabd.h a0, a0, a1
+; CHECK-NEXT: ret
+ %res = call <2 x i16> @llvm.riscv.pabd.v2i16(<2 x i16> %a, <2 x i16> %b)
+ ret <2 x i16> %res
+}
+
+define <4 x i8> @test_pabdu_v4i8(<4 x i8> %a, <4 x i8> %b) {
+; CHECK-LABEL: test_pabdu_v4i8:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pabdu.b a0, a0, a1
+; CHECK-NEXT: ret
+ %res = call <4 x i8> @llvm.riscv.pabdu.v4i8(<4 x i8> %a, <4 x i8> %b)
+ ret <4 x i8> %res
+}
+
+define <2 x i16> @test_pabdu_v2i16(<2 x i16> %a, <2 x i16> %b) {
+; CHECK-LABEL: test_pabdu_v2i16:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pabdu.h a0, a0, a1
+; CHECK-NEXT: ret
+ %res = call <2 x i16> @llvm.riscv.pabdu.v2i16(<2 x i16> %a, <2 x i16> %b)
+ ret <2 x i16> %res
+}
diff --git a/llvm/test/CodeGen/RISCV/rvp-simd-64.ll b/llvm/test/CodeGen/RISCV/rvp-simd-64.ll
index 2fcbc7bfec7f2..4cf6b6421f8b7 100644
--- a/llvm/test/CodeGen/RISCV/rvp-simd-64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-simd-64.ll
@@ -4888,3 +4888,88 @@ define <2 x i32> @test_pasubu_v2i32(<2 x i32> %a, <2 x i32> %b) {
%res = call <2 x i32> @llvm.riscv.pasubu.v2i32(<2 x i32> %a, <2 x i32> %b)
ret <2 x i32> %res
}
+
+define <8 x i8> @test_pabs_v8i8(<8 x i8> %a) {
+; RV32-LABEL: test_pabs_v8i8:
+; RV32: # %bb.0:
+; RV32-NEXT: pabs.db a0, a0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: test_pabs_v8i8:
+; RV64: # %bb.0:
+; RV64-NEXT: pabs.b a0, a0
+; RV64-NEXT: ret
+ %res = call <8 x i8> @llvm.riscv.pabs.v8i8(<8 x i8> %a)
+ ret <8 x i8> %res
+}
+
+define <4 x i16> @test_pabs_v4i16(<4 x i16> %a) {
+; RV32-LABEL: test_pabs_v4i16:
+; RV32: # %bb.0:
+; RV32-NEXT: pabs.dh a0, a0
+; RV32-NEXT: ret
+;
+; RV64-LABEL: test_pabs_v4i16:
+; RV64: # %bb.0:
+; RV64-NEXT: pabs.h a0, a0
+; RV64-NEXT: ret
+ %res = call <4 x i16> @llvm.riscv.pabs.v4i16(<4 x i16> %a)
+ ret <4 x i16> %res
+}
+
+; Packed Absolute Difference intrinsics (64-bit)
+define <8 x i8> @test_pabd_v8i8(<8 x i8> %a, <8 x i8> %b) {
+; RV32-LABEL: test_pabd_v8i8:
+; RV32: # %bb.0:
+; RV32-NEXT: pabd.db a0, a0, a2
+; RV32-NEXT: ret
+;
+; RV64-LABEL: test_pabd_v8i8:
+; RV64: # %bb.0:
+; RV64-NEXT: pabd.b a0, a0, a1
+; RV64-NEXT: ret
+ %res = call <8 x i8> @llvm.riscv.pabd.v8i8(<8 x i8> %a, <8 x i8> %b)
+ ret <8 x i8> %res
+}
+
+define <4 x i16> @test_pabd_v4i16(<4 x i16> %a, <4 x i16> %b) {
+; RV32-LABEL: test_pabd_v4i16:
+; RV32: # %bb.0:
+; RV32-NEXT: pabd.dh a0, a0, a2
+; RV32-NEXT: ret
+;
+; RV64-LABEL: test_pabd_v4i16:
+; RV64: # %bb.0:
+; RV64-NEXT: pabd.h a0, a0, a1
+; RV64-NEXT: ret
+ %res = call <4 x i16> @llvm.riscv.pabd.v4i16(<4 x i16> %a, <4 x i16> %b)
+ ret <4 x i16> %res
+}
+
+define <8 x i8> @test_pabdu_v8i8(<8 x i8> %a, <8 x i8> %b) {
+; RV32-LABEL: test_pabdu_v8i8:
+; RV32: # %bb.0:
+; RV32-NEXT: pabdu.db a0, a0, a2
+; RV32-NEXT: ret
+;
+; RV64-LABEL: test_pabdu_v8i8:
+; RV64: # %bb.0:
+; RV64-NEXT: pabdu.b a0, a0, a1
+; RV64-NEXT: ret
+ %res = call <8 x i8> @llvm.riscv.pabdu.v8i8(<8 x i8> %a, <8 x i8> %b)
+ ret <8 x i8> %res
+}
+
+define <4 x i16> @test_pabdu_v4i16(<4 x i16> %a, <4 x i16> %b) {
+; RV32-LABEL: test_pabdu_v4i16:
+; RV32: # %bb.0:
+; RV32-NEXT: pabdu.dh a0, a0, a2
+; RV32-NEXT: ret
+;
+; RV64-LABEL: test_pabdu_v4i16:
+; RV64: # %bb.0:
+; RV64-NEXT: pabdu.h a0, a0, a1
+; RV64-NEXT: ret
+ %res = call <4 x i16> @llvm.riscv.pabdu.v4i16(<4 x i16> %a, <4 x i16> %b)
+ ret <4 x i16> %res
+}
>From 40f3b848e3a9a2c4423fbbec98e71ce49828a4cb Mon Sep 17 00:00:00 2001
From: Dongyan Chen <chendongyan at isrc.iscas.ac.cn>
Date: Tue, 16 Jun 2026 07:46:33 +0000
Subject: [PATCH 2/3] code format
---
clang/lib/Headers/riscv_packed_simd.h | 14 +++++++-------
1 file changed, 7 insertions(+), 7 deletions(-)
diff --git a/clang/lib/Headers/riscv_packed_simd.h b/clang/lib/Headers/riscv_packed_simd.h
index b178c74e0635d..5539b9833bf6f 100644
--- a/clang/lib/Headers/riscv_packed_simd.h
+++ b/clang/lib/Headers/riscv_packed_simd.h
@@ -92,15 +92,15 @@ typedef uint32_t uint32x2_t __attribute__((__vector_size__(8)));
return (rty)(__rs1 op __rs2); \
}
-#define __packed_unary_builtin_cast(name, ty, rty, builtin) \
- static __inline__ rty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1) { \
- return (rty)builtin(__rs1); \
+#define __packed_unary_builtin_cast(name, ty, rty, builtin) \
+ static __inline__ rty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1) { \
+ return (rty)builtin(__rs1); \
}
-#define __packed_binary_builtin_cast(name, ty, rty, builtin) \
- static __inline__ rty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, \
- ty __rs2) { \
- return (rty)builtin(__rs1, __rs2); \
+#define __packed_binary_builtin_cast(name, ty, rty, builtin) \
+ static __inline__ rty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, \
+ ty __rs2) { \
+ return (rty)builtin(__rs1, __rs2); \
}
// clang-format off: macro call sites have no trailing semicolons, which
>From 54afdb1f856cd10573012f50b4fb4654e2e98fb4 Mon Sep 17 00:00:00 2001
From: Dongyan Chen <chendongyan at isrc.iscas.ac.cn>
Date: Wed, 17 Jun 2026 04:28:38 +0000
Subject: [PATCH 3/3] Remove pabs intrinsic, use __builtin_elementwise_abs
instead
---
clang/include/clang/Basic/BuiltinsRISCV.td | 4 --
clang/lib/CodeGen/TargetBuiltins/RISCV.cpp | 10 -----
clang/lib/Headers/riscv_packed_simd.h | 14 +++---
clang/test/CodeGen/RISCV/rvp-intrinsics.c | 48 ++++++++++-----------
llvm/include/llvm/IR/IntrinsicsRISCV.td | 6 ---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 16 -------
llvm/test/CodeGen/RISCV/rvp-simd-32.ll | 18 --------
llvm/test/CodeGen/RISCV/rvp-simd-64.ll | 29 -------------
8 files changed, 31 insertions(+), 114 deletions(-)
diff --git a/clang/include/clang/Basic/BuiltinsRISCV.td b/clang/include/clang/Basic/BuiltinsRISCV.td
index 4cea2612fffdd..3a1b54763bae6 100644
--- a/clang/include/clang/Basic/BuiltinsRISCV.td
+++ b/clang/include/clang/Basic/BuiltinsRISCV.td
@@ -182,16 +182,12 @@ def pasubu_u16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, unsigned
def pasubu_u32x2 : RISCVBuiltin<"_Vector<2, unsigned int>(_Vector<2, unsigned int>, _Vector<2, unsigned int>)">;
// Packed Absolute Value and Absolute Difference (32-bit)
-def pabs_i8x4 : RISCVBuiltin<"_Vector<4, unsigned char>(_Vector<4, signed char>)">;
-def pabs_i16x2 : RISCVBuiltin<"_Vector<2, unsigned short>(_Vector<2, short>)">;
def pabd_i8x4 : RISCVBuiltin<"_Vector<4, unsigned char>(_Vector<4, signed char>, _Vector<4, signed char>)">;
def pabd_i16x2 : RISCVBuiltin<"_Vector<2, unsigned short>(_Vector<2, short>, _Vector<2, short>)">;
def pabdu_u8x4 : RISCVBuiltin<"_Vector<4, unsigned char>(_Vector<4, unsigned char>, _Vector<4, unsigned char>)">;
def pabdu_u16x2 : RISCVBuiltin<"_Vector<2, unsigned short>(_Vector<2, unsigned short>, _Vector<2, unsigned short>)">;
// Packed Absolute Value and Absolute Difference (64-bit)
-def pabs_i8x8 : RISCVBuiltin<"_Vector<8, unsigned char>(_Vector<8, signed char>)">;
-def pabs_i16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, short>)">;
def pabd_i8x8 : RISCVBuiltin<"_Vector<8, unsigned char>(_Vector<8, signed char>, _Vector<8, signed char>)">;
def pabd_i16x4 : RISCVBuiltin<"_Vector<4, unsigned short>(_Vector<4, short>, _Vector<4, short>)">;
def pabdu_u8x8 : RISCVBuiltin<"_Vector<8, unsigned char>(_Vector<8, unsigned char>, _Vector<8, unsigned char>)">;
diff --git a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
index 44ec84f741b29..a1e9acb7ec2c8 100644
--- a/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
+++ b/clang/lib/CodeGen/TargetBuiltins/RISCV.cpp
@@ -1221,10 +1221,6 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
case RISCV::BI__builtin_riscv_pasubu_u16x4:
case RISCV::BI__builtin_riscv_pasubu_u32x2:
// Packed Absolute Value and Absolute Difference
- case RISCV::BI__builtin_riscv_pabs_i8x4:
- case RISCV::BI__builtin_riscv_pabs_i16x2:
- case RISCV::BI__builtin_riscv_pabs_i8x8:
- case RISCV::BI__builtin_riscv_pabs_i16x4:
case RISCV::BI__builtin_riscv_pabd_i8x4:
case RISCV::BI__builtin_riscv_pabd_i16x2:
case RISCV::BI__builtin_riscv_pabd_i8x8:
@@ -1264,12 +1260,6 @@ Value *CodeGenFunction::EmitRISCVBuiltinExpr(unsigned BuiltinID,
case RISCV::BI__builtin_riscv_pasubu_u32x2:
ID = Intrinsic::riscv_pasubu;
break;
- case RISCV::BI__builtin_riscv_pabs_i8x4:
- case RISCV::BI__builtin_riscv_pabs_i16x2:
- case RISCV::BI__builtin_riscv_pabs_i8x8:
- case RISCV::BI__builtin_riscv_pabs_i16x4:
- ID = Intrinsic::riscv_pabs;
- break;
case RISCV::BI__builtin_riscv_pabd_i8x4:
case RISCV::BI__builtin_riscv_pabd_i16x2:
case RISCV::BI__builtin_riscv_pabd_i8x8:
diff --git a/clang/lib/Headers/riscv_packed_simd.h b/clang/lib/Headers/riscv_packed_simd.h
index 5539b9833bf6f..56f6b108d5f14 100644
--- a/clang/lib/Headers/riscv_packed_simd.h
+++ b/clang/lib/Headers/riscv_packed_simd.h
@@ -92,9 +92,9 @@ typedef uint32_t uint32x2_t __attribute__((__vector_size__(8)));
return (rty)(__rs1 op __rs2); \
}
-#define __packed_unary_builtin_cast(name, ty, rty, builtin) \
+#define __packed_pabs(name, ty, rty) \
static __inline__ rty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1) { \
- return (rty)builtin(__rs1); \
+ return (rty)__builtin_elementwise_abs(__rs1); \
}
#define __packed_binary_builtin_cast(name, ty, rty, builtin) \
@@ -387,16 +387,16 @@ __packed_binary_builtin(pasubu_u16x4, uint16x4_t, __builtin_riscv_pasubu_u16x4)
__packed_binary_builtin(pasubu_u32x2, uint32x2_t, __builtin_riscv_pasubu_u32x2)
/* Packed Absolute Value and Absolute Difference (32-bit) */
-__packed_unary_builtin_cast(pabs_i8x4, int8x4_t, uint8x4_t, __builtin_riscv_pabs_i8x4)
-__packed_unary_builtin_cast(pabs_i16x2, int16x2_t, uint16x2_t, __builtin_riscv_pabs_i16x2)
+__packed_pabs(pabs_i8x4, int8x4_t, uint8x4_t)
+__packed_pabs(pabs_i16x2, int16x2_t, uint16x2_t)
__packed_binary_builtin_cast(pabd_i8x4, int8x4_t, uint8x4_t, __builtin_riscv_pabd_i8x4)
__packed_binary_builtin_cast(pabd_i16x2, int16x2_t, uint16x2_t, __builtin_riscv_pabd_i16x2)
__packed_binary_builtin_cast(pabdu_u8x4, uint8x4_t, uint8x4_t, __builtin_riscv_pabdu_u8x4)
__packed_binary_builtin_cast(pabdu_u16x2, uint16x2_t, uint16x2_t, __builtin_riscv_pabdu_u16x2)
/* Packed Absolute Value and Absolute Difference (64-bit) */
-__packed_unary_builtin_cast(pabs_i8x8, int8x8_t, uint8x8_t, __builtin_riscv_pabs_i8x8)
-__packed_unary_builtin_cast(pabs_i16x4, int16x4_t, uint16x4_t, __builtin_riscv_pabs_i16x4)
+__packed_pabs(pabs_i8x8, int8x8_t, uint8x8_t)
+__packed_pabs(pabs_i16x4, int16x4_t, uint16x4_t)
__packed_binary_builtin_cast(pabd_i8x8, int8x8_t, uint8x8_t, __builtin_riscv_pabd_i8x8)
__packed_binary_builtin_cast(pabd_i16x4, int16x4_t, uint16x4_t, __builtin_riscv_pabd_i16x4)
__packed_binary_builtin_cast(pabdu_u8x8, uint8x8_t, uint8x8_t, __builtin_riscv_pabdu_u8x8)
@@ -419,7 +419,7 @@ __packed_binary_builtin_cast(pabdu_u16x4, uint16x4_t, uint16x4_t, __builtin_risc
#undef __packed_sh1add
#undef __packed_sh1sadd
#undef __packed_cmp
-#undef __packed_unary_builtin_cast
+#undef __packed_pabs
#undef __packed_binary_builtin_cast
#undef __DEFAULT_FN_ATTRS
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
index f4e949084b9f9..cc388d0ab0328 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
@@ -5240,17 +5240,17 @@ uint32x2_t test_pasubu_u32x2(uint32x2_t rs1, uint32x2_t rs2) {
// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = call <4 x i8> @llvm.riscv.pabs.v4i8(<4 x i8> [[TMP0]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[TMP1]] to i32
-// RV32-NEXT: ret i32 [[TMP2]]
+// RV32-NEXT: [[ELT_ABS_I:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[TMP0]], i1 false)
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[ELT_ABS_I]] to i32
+// RV32-NEXT: ret i32 [[TMP1]]
//
// RV64-LABEL: define dso_local i32 @test_pabs_i8x4(
// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <4 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = call <4 x i8> @llvm.riscv.pabs.v4i8(<4 x i8> [[TMP0]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[TMP1]] to i32
-// RV64-NEXT: ret i32 [[TMP2]]
+// RV64-NEXT: [[ELT_ABS_I:%.*]] = call <4 x i8> @llvm.abs.v4i8(<4 x i8> [[TMP0]], i1 false)
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[ELT_ABS_I]] to i32
+// RV64-NEXT: ret i32 [[TMP1]]
//
uint8x4_t test_pabs_i8x4(int8x4_t rs1) {
return __riscv_pabs_i8x4(rs1);
@@ -5260,17 +5260,17 @@ uint8x4_t test_pabs_i8x4(int8x4_t rs1) {
// RV32-SAME: i32 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.riscv.pabs.v2i16(<2 x i16> [[TMP0]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32
-// RV32-NEXT: ret i32 [[TMP2]]
+// RV32-NEXT: [[ELT_ABS_I:%.*]] = call <2 x i16> @llvm.abs.v2i16(<2 x i16> [[TMP0]], i1 false)
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[ELT_ABS_I]] to i32
+// RV32-NEXT: ret i32 [[TMP1]]
//
// RV64-LABEL: define dso_local i32 @test_pabs_i16x2(
// RV64-SAME: i32 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[RS1_COERCE]] to <2 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = call <2 x i16> @llvm.riscv.pabs.v2i16(<2 x i16> [[TMP0]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[TMP1]] to i32
-// RV64-NEXT: ret i32 [[TMP2]]
+// RV64-NEXT: [[ELT_ABS_I:%.*]] = call <2 x i16> @llvm.abs.v2i16(<2 x i16> [[TMP0]], i1 false)
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[ELT_ABS_I]] to i32
+// RV64-NEXT: ret i32 [[TMP1]]
//
uint16x2_t test_pabs_i16x2(int16x2_t rs1) {
return __riscv_pabs_i16x2(rs1);
@@ -5368,17 +5368,17 @@ uint16x2_t test_pabdu_u16x2(uint16x2_t rs1, uint16x2_t rs2) {
// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.riscv.pabs.v8i8(<8 x i8> [[TMP0]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
+// RV32-NEXT: [[ELT_ABS_I:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[TMP0]], i1 false)
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[ELT_ABS_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
//
// RV64-LABEL: define dso_local i64 @test_pabs_i8x8(
// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <8 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = call <8 x i8> @llvm.riscv.pabs.v8i8(<8 x i8> [[TMP0]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[TMP1]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
+// RV64-NEXT: [[ELT_ABS_I:%.*]] = call <8 x i8> @llvm.abs.v8i8(<8 x i8> [[TMP0]], i1 false)
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[ELT_ABS_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
//
uint8x8_t test_pabs_i8x8(int8x8_t rs1) {
return __riscv_pabs_i8x8(rs1);
@@ -5388,17 +5388,17 @@ uint8x8_t test_pabs_i8x8(int8x8_t rs1) {
// RV32-SAME: i64 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.riscv.pabs.v4i16(<4 x i16> [[TMP0]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
+// RV32-NEXT: [[ELT_ABS_I:%.*]] = call <4 x i16> @llvm.abs.v4i16(<4 x i16> [[TMP0]], i1 false)
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[ELT_ABS_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
//
// RV64-LABEL: define dso_local i64 @test_pabs_i16x4(
// RV64-SAME: i64 noundef [[RS1_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[RS1_COERCE]] to <4 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = call <4 x i16> @llvm.riscv.pabs.v4i16(<4 x i16> [[TMP0]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[TMP1]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
+// RV64-NEXT: [[ELT_ABS_I:%.*]] = call <4 x i16> @llvm.abs.v4i16(<4 x i16> [[TMP0]], i1 false)
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[ELT_ABS_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
//
uint16x4_t test_pabs_i16x4(int16x4_t rs1) {
return __riscv_pabs_i16x4(rs1);
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index aa8a20cf52b62..a82b17591f780 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -2046,11 +2046,6 @@ def int_riscv_pause : DefaultAttrsIntrinsic<[], [], [IntrNoMem, IntrHasSideEffec
// Packed SIMD extensions
//===----------------------------------------------------------------------===//
let TargetPrefix = "riscv" in {
-class RVPUnaryIntrinsic
- : DefaultAttrsIntrinsic<[llvm_anyvector_ty],
- [LLVMMatchType<0>],
- [IntrNoMem, IntrSpeculatable]>;
-
class RVPBinaryIntrinsic
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>],
@@ -2063,7 +2058,6 @@ class RVPBinaryIntrinsic
def int_riscv_pasubu : RVPBinaryIntrinsic;
// Packed Absolute Value and Absolute Difference
- def int_riscv_pabs : RVPUnaryIntrinsic;
def int_riscv_pabd : RVPBinaryIntrinsic;
def int_riscv_pabdu : RVPBinaryIntrinsic;
} // TargetPrefix = "riscv"
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 2afebfeb68bbe..85385955a4716 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -11736,8 +11736,6 @@ SDValue RISCVTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
unsigned Opc = IntNo == Intrinsic::riscv_clmulh ? ISD::CLMULH : ISD::CLMULR;
return DAG.getNode(Opc, DL, XLenVT, Op.getOperand(1), Op.getOperand(2));
}
- case Intrinsic::riscv_pabs:
- return DAG.getNode(ISD::ABS, DL, Op.getValueType(), Op.getOperand(1));
case Intrinsic::riscv_paadd:
case Intrinsic::riscv_paaddu:
case Intrinsic::riscv_pasub:
@@ -15673,20 +15671,6 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, MVT::i32, Res));
return;
}
- case Intrinsic::riscv_pabs: {
- EVT VT = N->getValueType(0);
- if (!Subtarget.is64Bit() || (VT != MVT::v4i8 && VT != MVT::v2i16))
- return;
-
- EVT WideVT = VT == MVT::v4i8 ? MVT::v8i8 : MVT::v4i16;
- SDValue Undef = DAG.getUNDEF(VT);
- SDValue Op0 =
- DAG.getNode(ISD::CONCAT_VECTORS, DL, WideVT, N->getOperand(1), Undef);
- SDValue Res = DAG.getNode(ISD::ABS, DL, WideVT, Op0);
- Results.push_back(DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, Res,
- DAG.getVectorIdxConstant(0, DL)));
- return;
- }
case Intrinsic::riscv_paadd:
case Intrinsic::riscv_paaddu:
case Intrinsic::riscv_pasub:
diff --git a/llvm/test/CodeGen/RISCV/rvp-simd-32.ll b/llvm/test/CodeGen/RISCV/rvp-simd-32.ll
index 2df6011878fc8..ce5945aa03547 100644
--- a/llvm/test/CodeGen/RISCV/rvp-simd-32.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-simd-32.ll
@@ -2759,24 +2759,6 @@ define <2 x i16> @test_pasubu_v2i16(<2 x i16> %a, <2 x i16> %b) {
ret <2 x i16> %res
}
-define <4 x i8> @test_pabs_v4i8(<4 x i8> %a) {
-; CHECK-LABEL: test_pabs_v4i8:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pabs.b a0, a0
-; CHECK-NEXT: ret
- %res = call <4 x i8> @llvm.riscv.pabs.v4i8(<4 x i8> %a)
- ret <4 x i8> %res
-}
-
-define <2 x i16> @test_pabs_v2i16(<2 x i16> %a) {
-; CHECK-LABEL: test_pabs_v2i16:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pabs.h a0, a0
-; CHECK-NEXT: ret
- %res = call <2 x i16> @llvm.riscv.pabs.v2i16(<2 x i16> %a)
- ret <2 x i16> %res
-}
-
define <4 x i8> @test_pabd_v4i8(<4 x i8> %a, <4 x i8> %b) {
; CHECK-LABEL: test_pabd_v4i8:
; CHECK: # %bb.0:
diff --git a/llvm/test/CodeGen/RISCV/rvp-simd-64.ll b/llvm/test/CodeGen/RISCV/rvp-simd-64.ll
index 4cf6b6421f8b7..13ff4b0e6202e 100644
--- a/llvm/test/CodeGen/RISCV/rvp-simd-64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-simd-64.ll
@@ -4889,35 +4889,6 @@ define <2 x i32> @test_pasubu_v2i32(<2 x i32> %a, <2 x i32> %b) {
ret <2 x i32> %res
}
-define <8 x i8> @test_pabs_v8i8(<8 x i8> %a) {
-; RV32-LABEL: test_pabs_v8i8:
-; RV32: # %bb.0:
-; RV32-NEXT: pabs.db a0, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: test_pabs_v8i8:
-; RV64: # %bb.0:
-; RV64-NEXT: pabs.b a0, a0
-; RV64-NEXT: ret
- %res = call <8 x i8> @llvm.riscv.pabs.v8i8(<8 x i8> %a)
- ret <8 x i8> %res
-}
-
-define <4 x i16> @test_pabs_v4i16(<4 x i16> %a) {
-; RV32-LABEL: test_pabs_v4i16:
-; RV32: # %bb.0:
-; RV32-NEXT: pabs.dh a0, a0
-; RV32-NEXT: ret
-;
-; RV64-LABEL: test_pabs_v4i16:
-; RV64: # %bb.0:
-; RV64-NEXT: pabs.h a0, a0
-; RV64-NEXT: ret
- %res = call <4 x i16> @llvm.riscv.pabs.v4i16(<4 x i16> %a)
- ret <4 x i16> %res
-}
-
-; Packed Absolute Difference intrinsics (64-bit)
define <8 x i8> @test_pabd_v8i8(<8 x i8> %a, <8 x i8> %b) {
; RV32-LABEL: test_pabd_v8i8:
; RV32: # %bb.0:
More information about the cfe-commits
mailing list