[clang] [llvm] [RISCV] Add riscv_packed_simd.h for P extension intrinsics (PR #181115)
via cfe-commits
cfe-commits at lists.llvm.org
Mon Jun 8 18:01:11 PDT 2026
https://github.com/sihuan updated https://github.com/llvm/llvm-project/pull/181115
>From 846f9ab5d88ecf42b75170b19b938eab059dede4 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Fri, 30 Jan 2026 00:41:13 +0800
Subject: [PATCH 01/19] [Clang][RISCV] Add riscv_simd.h for P extension
intrinsics
This patch adds `riscv_simd.h`, introducing initial support for RISC-V P extension intrinsics.
The supported operations include:
- Packed addition and subtraction (padd, psub)
- Packed logic and arithmetic shifts (psll, psrl, psra)
These intrinsics are implemented using standard C operators to generate canonical LLVM IR (e.g., `add <4 x i8>`, `shl <2 x i16>`). The implementation relies on the RISC-V backend to correctly lower this IR to specific P extension instructions.
---
clang/lib/Headers/CMakeLists.txt | 1 +
clang/lib/Headers/riscv_simd.h | 245 +++++
clang/test/CodeGen/RISCV/rvp-intrinsics.c | 1009 +++++++++++++++++++++
3 files changed, 1255 insertions(+)
create mode 100644 clang/lib/Headers/riscv_simd.h
create mode 100644 clang/test/CodeGen/RISCV/rvp-intrinsics.c
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index ce34f8b9410a7..968e6234c0949 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -140,6 +140,7 @@ set(riscv_files
riscv_corev_alu.h
riscv_mips.h
riscv_nds.h
+ riscv_simd.h
sifive_vector.h
)
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
new file mode 100644
index 0000000000000..262f35b483cbd
--- /dev/null
+++ b/clang/lib/Headers/riscv_simd.h
@@ -0,0 +1,245 @@
+/*===---- riscv_simd.h - RISC-V P intrinsics -----------------===
+ *
+ * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+ * See https://llvm.org/LICENSE.txt for license information.
+ * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+ *
+ *===-----------------------------------------------------------------------===
+ */
+
+#ifndef __RISCV_SIMD_H
+#define __RISCV_SIMD_H
+
+#include <stdint.h>
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+/* Packed SIMD Types */
+
+typedef int8_t int8x4_t __attribute__((vector_size(4)));
+typedef uint8_t uint8x4_t __attribute__((vector_size(4)));
+typedef int16_t int16x2_t __attribute__((vector_size(4)));
+typedef uint16_t uint16x2_t __attribute__((vector_size(4)));
+
+typedef int8_t int8x8_t __attribute__((vector_size(8)));
+typedef uint8_t uint8x8_t __attribute__((vector_size(8)));
+typedef int16_t int16x4_t __attribute__((vector_size(8)));
+typedef uint16_t uint16x4_t __attribute__((vector_size(8)));
+typedef int32_t int32x2_t __attribute__((vector_size(8)));
+typedef uint32_t uint32x2_t __attribute__((vector_size(8)));
+
+/* Packed Addition and Subtraction (32-bit) */
+
+static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
+__riscv_padd_i8x4(int8x4_t __rs1, int8x4_t __rs2) {
+ return __rs1 + __rs2;
+}
+
+static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
+__riscv_padd_u8x4(uint8x4_t __rs1, uint8x4_t __rs2) {
+ return __rs1 + __rs2;
+}
+
+static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
+__riscv_padd_i16x2(int16x2_t __rs1, int16x2_t __rs2) {
+ return __rs1 + __rs2;
+}
+
+static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
+__riscv_padd_u16x2(uint16x2_t __rs1, uint16x2_t __rs2) {
+ return __rs1 + __rs2;
+}
+
+static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psub_i8x4(int8x4_t __rs1, int8x4_t __rs2) {
+ return __rs1 - __rs2;
+}
+
+static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psub_u8x4(uint8x4_t __rs1, uint8x4_t __rs2) {
+ return __rs1 - __rs2;
+}
+
+static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psub_i16x2(int16x2_t __rs1, int16x2_t __rs2) {
+ return __rs1 - __rs2;
+}
+
+static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psub_u16x2(uint16x2_t __rs1, uint16x2_t __rs2) {
+ return __rs1 - __rs2;
+}
+
+/* Packed Addition and Subtraction (64-bit) */
+
+static __inline__ int8x8_t __attribute__((__always_inline__, __nodebug__))
+__riscv_padd_i8x8(int8x8_t __rs1, int8x8_t __rs2) {
+ return __rs1 + __rs2;
+}
+
+static __inline__ uint8x8_t __attribute__((__always_inline__, __nodebug__))
+__riscv_padd_u8x8(uint8x8_t __rs1, uint8x8_t __rs2) {
+ return __rs1 + __rs2;
+}
+
+static __inline__ int16x4_t __attribute__((__always_inline__, __nodebug__))
+__riscv_padd_i16x4(int16x4_t __rs1, int16x4_t __rs2) {
+ return __rs1 + __rs2;
+}
+
+static __inline__ uint16x4_t __attribute__((__always_inline__, __nodebug__))
+__riscv_padd_u16x4(uint16x4_t __rs1, uint16x4_t __rs2) {
+ return __rs1 + __rs2;
+}
+
+static __inline__ int32x2_t __attribute__((__always_inline__, __nodebug__))
+__riscv_padd_i32x2(int32x2_t __rs1, int32x2_t __rs2) {
+ return __rs1 + __rs2;
+}
+
+static __inline__ uint32x2_t __attribute__((__always_inline__, __nodebug__))
+__riscv_padd_u32x2(uint32x2_t __rs1, uint32x2_t __rs2) {
+ return __rs1 + __rs2;
+}
+
+static __inline__ int8x8_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psub_i8x8(int8x8_t __rs1, int8x8_t __rs2) {
+ return __rs1 - __rs2;
+}
+
+static __inline__ uint8x8_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psub_u8x8(uint8x8_t __rs1, uint8x8_t __rs2) {
+ return __rs1 - __rs2;
+}
+
+static __inline__ int16x4_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psub_i16x4(int16x4_t __rs1, int16x4_t __rs2) {
+ return __rs1 - __rs2;
+}
+
+static __inline__ uint16x4_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psub_u16x4(uint16x4_t __rs1, uint16x4_t __rs2) {
+ return __rs1 - __rs2;
+}
+
+static __inline__ int32x2_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psub_i32x2(int32x2_t __rs1, int32x2_t __rs2) {
+ return __rs1 - __rs2;
+}
+
+static __inline__ uint32x2_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psub_u32x2(uint32x2_t __rs1, uint32x2_t __rs2) {
+ return __rs1 - __rs2;
+}
+
+/* Packed Shifts (32-bit) */
+
+static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psll_s_u8x4(uint8x4_t __rs1, unsigned __shamt) {
+ return __rs1 << __shamt;
+}
+
+static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psll_s_i8x4(int8x4_t __rs1, unsigned __shamt) {
+ return __rs1 << __shamt;
+}
+
+static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psll_s_u16x2(uint16x2_t __rs1, unsigned __shamt) {
+ return __rs1 << __shamt;
+}
+
+static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psll_s_i16x2(int16x2_t __rs1, unsigned __shamt) {
+ return __rs1 << __shamt;
+}
+
+static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psrl_s_u8x4(uint8x4_t __rs1, unsigned __shamt) {
+ return __rs1 >> __shamt;
+}
+
+static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psrl_s_u16x2(uint16x2_t __rs1, unsigned __shamt) {
+ return __rs1 >> __shamt;
+}
+
+static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psra_s_i8x4(int8x4_t __rs1, unsigned __shamt) {
+ return __rs1 >> __shamt;
+}
+
+static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psra_s_i16x2(int16x2_t __rs1, unsigned __shamt) {
+ return __rs1 >> __shamt;
+}
+
+/* Packed Shifts (64-bit) */
+
+static __inline__ uint8x8_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psll_s_u8x8(uint8x8_t __rs1, unsigned __shamt) {
+ return __rs1 << __shamt;
+}
+
+static __inline__ int8x8_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psll_s_i8x8(int8x8_t __rs1, unsigned __shamt) {
+ return __rs1 << __shamt;
+}
+
+static __inline__ uint16x4_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psll_s_u16x4(uint16x4_t __rs1, unsigned __shamt) {
+ return __rs1 << __shamt;
+}
+
+static __inline__ int16x4_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psll_s_i16x4(int16x4_t __rs1, unsigned __shamt) {
+ return __rs1 << __shamt;
+}
+
+static __inline__ uint32x2_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psll_s_u32x2(uint32x2_t __rs1, unsigned __shamt) {
+ return __rs1 << __shamt;
+}
+
+static __inline__ int32x2_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psll_s_i32x2(int32x2_t __rs1, unsigned __shamt) {
+ return __rs1 << __shamt;
+}
+
+static __inline__ uint8x8_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psrl_s_u8x8(uint8x8_t __rs1, unsigned __shamt) {
+ return __rs1 >> __shamt;
+}
+
+static __inline__ uint16x4_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psrl_s_u16x4(uint16x4_t __rs1, unsigned __shamt) {
+ return __rs1 >> __shamt;
+}
+
+static __inline__ uint32x2_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psrl_s_u32x2(uint32x2_t __rs1, unsigned __shamt) {
+ return __rs1 >> __shamt;
+}
+
+static __inline__ int8x8_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psra_s_i8x8(int8x8_t __rs1, unsigned __shamt) {
+ return __rs1 >> __shamt;
+}
+
+static __inline__ int16x4_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psra_s_i16x4(int16x4_t __rs1, unsigned __shamt) {
+ return __rs1 >> __shamt;
+}
+
+static __inline__ int32x2_t __attribute__((__always_inline__, __nodebug__))
+__riscv_psra_s_i32x2(int32x2_t __rs1, unsigned __shamt) {
+ return __rs1 >> __shamt;
+}
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif /* __RISCV_SIMD_H */
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
new file mode 100644
index 0000000000000..40a21fa071387
--- /dev/null
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
@@ -0,0 +1,1009 @@
+// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 6
+// RUN: %clang_cc1 -triple riscv32 -target-feature +experimental-p \
+// RUN: -disable-O0-optnone -emit-llvm -o - %s \
+// RUN: | opt -S -passes=sroa,instcombine | FileCheck %s --check-prefix=RV32
+// RUN: %clang_cc1 -triple riscv64 -target-feature +experimental-p \
+// RUN: -disable-O0-optnone -emit-llvm -o - %s \
+// RUN: | opt -S -passes=sroa,instcombine | FileCheck %s --check-prefix=RV64
+
+#include <riscv_simd.h>
+
+/* 32-bit Packed Addition and Subtraction */
+
+// RV32-LABEL: define dso_local i32 @test_padd_i8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_i8x4(
+// RV64-SAME: i64 [[A_COERCE:%.*]], i64 [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <4 x i8>
+// RV64-NEXT: [[COERCE_VAL_II1_I:%.*]] = trunc i64 [[B_COERCE]] to i32
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1_I]] to <4 x i8>
+// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP2]] to i64
+// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+//
+int8x4_t test_padd_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_padd_i8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_padd_u8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_u8x4(
+// RV64-SAME: i64 [[A_COERCE:%.*]], i64 [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <4 x i8>
+// RV64-NEXT: [[COERCE_VAL_II1_I:%.*]] = trunc i64 [[B_COERCE]] to i32
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1_I]] to <4 x i8>
+// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP2]] to i64
+// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+//
+uint8x4_t test_padd_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_padd_u8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_padd_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_i16x2(
+// RV64-SAME: i64 [[A_COERCE:%.*]], i64 [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <2 x i16>
+// RV64-NEXT: [[COERCE_VAL_II1_I:%.*]] = trunc i64 [[B_COERCE]] to i32
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1_I]] to <2 x i16>
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP2]] to i64
+// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+//
+int16x2_t test_padd_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_padd_i16x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_padd_u16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_u16x2(
+// RV64-SAME: i64 [[A_COERCE:%.*]], i64 [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <2 x i16>
+// RV64-NEXT: [[COERCE_VAL_II1_I:%.*]] = trunc i64 [[B_COERCE]] to i32
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1_I]] to <2 x i16>
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP2]] to i64
+// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+//
+uint16x2_t test_padd_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_padd_u16x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psub_i8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psub_i8x4(
+// RV64-SAME: i64 [[A_COERCE:%.*]], i64 [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <4 x i8>
+// RV64-NEXT: [[COERCE_VAL_II1_I:%.*]] = trunc i64 [[B_COERCE]] to i32
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1_I]] to <4 x i8>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP2]] to i64
+// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+//
+int8x4_t test_psub_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_psub_i8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psub_u8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psub_u8x4(
+// RV64-SAME: i64 [[A_COERCE:%.*]], i64 [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <4 x i8>
+// RV64-NEXT: [[COERCE_VAL_II1_I:%.*]] = trunc i64 [[B_COERCE]] to i32
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1_I]] to <4 x i8>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP2]] to i64
+// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+//
+uint8x4_t test_psub_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_psub_u8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psub_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psub_i16x2(
+// RV64-SAME: i64 [[A_COERCE:%.*]], i64 [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <2 x i16>
+// RV64-NEXT: [[COERCE_VAL_II1_I:%.*]] = trunc i64 [[B_COERCE]] to i32
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1_I]] to <2 x i16>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP2]] to i64
+// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+//
+int16x2_t test_psub_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_psub_i16x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psub_u16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psub_u16x2(
+// RV64-SAME: i64 [[A_COERCE:%.*]], i64 [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <2 x i16>
+// RV64-NEXT: [[COERCE_VAL_II1_I:%.*]] = trunc i64 [[B_COERCE]] to i32
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1_I]] to <2 x i16>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP2]] to i64
+// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+//
+uint16x2_t test_psub_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_psub_u16x2(a, b);
+}
+
+/* 64-bit Packed Addition and Subtraction */
+
+// RV32-LABEL: define dso_local i64 @test_padd_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int8x8_t test_padd_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_padd_i8x8(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_padd_u8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_u8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint8x8_t test_padd_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_padd_u8x8(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_padd_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int16x4_t test_padd_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_padd_i16x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_padd_u16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_u16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint16x4_t test_padd_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_padd_u16x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_padd_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int32x2_t test_padd_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_padd_i32x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_padd_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint32x2_t test_padd_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_padd_u32x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psub_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psub_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int8x8_t test_psub_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_psub_i8x8(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psub_u8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psub_u8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint8x8_t test_psub_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_psub_u8x8(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psub_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psub_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int16x4_t test_psub_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_psub_i16x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psub_u16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psub_u16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint16x4_t test_psub_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_psub_u16x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psub_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psub_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int32x2_t test_psub_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_psub_i32x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psub_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psub_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint32x2_t test_psub_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_psub_u32x2(a, b);
+}
+
+/* 32-bit Packed Shifts */
+
+// RV32-LABEL: define dso_local i32 @test_psll_s_i8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
+// RV32-NEXT: ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_psll_s_i8x4(
+// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64
+// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+//
+int8x4_t test_psll_s_i8x4(int8x4_t a, unsigned shamt) {
+ return __riscv_psll_s_i8x4(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psll_s_u8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
+// RV32-NEXT: ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_psll_s_u8x4(
+// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64
+// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+//
+uint8x4_t test_psll_s_u8x4(uint8x4_t a, unsigned shamt) {
+ return __riscv_psll_s_u8x4(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psll_s_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
+// RV32-NEXT: ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_psll_s_i16x2(
+// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64
+// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+//
+int16x2_t test_psll_s_i16x2(int16x2_t a, unsigned shamt) {
+ return __riscv_psll_s_i16x2(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psll_s_u16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
+// RV32-NEXT: ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_psll_s_u16x2(
+// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64
+// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+//
+uint16x2_t test_psll_s_u16x2(uint16x2_t a, unsigned shamt) {
+ return __riscv_psll_s_u16x2(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psra_s_i8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = ashr <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
+// RV32-NEXT: ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_psra_s_i8x4(
+// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = ashr <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64
+// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+//
+int8x4_t test_psra_s_i8x4(int8x4_t a, unsigned shamt) {
+ return __riscv_psra_s_i8x4(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psrl_s_u8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = lshr <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
+// RV32-NEXT: ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_psrl_s_u8x4(
+// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = lshr <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64
+// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+//
+uint8x4_t test_psrl_s_u8x4(uint8x4_t a, unsigned shamt) {
+ return __riscv_psrl_s_u8x4(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psra_s_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = ashr <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
+// RV32-NEXT: ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_psra_s_i16x2(
+// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = ashr <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64
+// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+//
+int16x2_t test_psra_s_i16x2(int16x2_t a, unsigned shamt) {
+ return __riscv_psra_s_i16x2(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psrl_s_u16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = lshr <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
+// RV32-NEXT: ret i32 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_psrl_s_u16x2(
+// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = lshr <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64
+// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+//
+uint16x2_t test_psrl_s_u16x2(uint16x2_t a, unsigned shamt) {
+ return __riscv_psrl_s_u16x2(a, shamt);
+}
+
+/* 64-bit Packed Shifts */
+
+// RV32-LABEL: define dso_local i64 @test_psll_s_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_psll_s_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP3]]
+//
+int8x8_t test_psll_s_i8x8(int8x8_t a, unsigned shamt) {
+ return __riscv_psll_s_i8x8(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psll_s_u8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_psll_s_u8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP3]]
+//
+uint8x8_t test_psll_s_u8x8(uint8x8_t a, unsigned shamt) {
+ return __riscv_psll_s_u8x8(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psll_s_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[TMP1]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_psll_s_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[TMP1]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP3]]
+//
+int16x4_t test_psll_s_i16x4(int16x4_t a, unsigned shamt) {
+ return __riscv_psll_s_i16x4(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psll_s_u16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[TMP1]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_psll_s_u16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[TMP1]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP3]]
+//
+uint16x4_t test_psll_s_u16x4(uint16x4_t a, unsigned shamt) {
+ return __riscv_psll_s_u16x4(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psll_s_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[SHAMT]], i64 0
+// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i64 @test_psll_s_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[SHAMT]], i64 0
+// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
+//
+int32x2_t test_psll_s_i32x2(int32x2_t a, unsigned shamt) {
+ return __riscv_psll_s_i32x2(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psll_s_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[SHAMT]], i64 0
+// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i64 @test_psll_s_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[SHAMT]], i64 0
+// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
+//
+uint32x2_t test_psll_s_u32x2(uint32x2_t a, unsigned shamt) {
+ return __riscv_psll_s_u32x2(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psra_s_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = ashr <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_psra_s_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = ashr <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP3]]
+//
+int8x8_t test_psra_s_i8x8(int8x8_t a, unsigned shamt) {
+ return __riscv_psra_s_i8x8(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psrl_s_u8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = lshr <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_psrl_s_u8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = lshr <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP3]]
+//
+uint8x8_t test_psrl_s_u8x8(uint8x8_t a, unsigned shamt) {
+ return __riscv_psrl_s_u8x8(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psra_s_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[TMP1]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = ashr <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_psra_s_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[TMP1]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = ashr <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP3]]
+//
+int16x4_t test_psra_s_i16x4(int16x4_t a, unsigned shamt) {
+ return __riscv_psra_s_i16x4(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psrl_s_u16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[TMP1]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = lshr <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP3]]
+//
+// RV64-LABEL: define dso_local i64 @test_psrl_s_u16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[TMP1]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = lshr <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP3]]
+//
+uint16x4_t test_psrl_s_u16x4(uint16x4_t a, unsigned shamt) {
+ return __riscv_psrl_s_u16x4(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psra_s_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[SHAMT]], i64 0
+// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = ashr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i64 @test_psra_s_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[SHAMT]], i64 0
+// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = ashr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
+//
+int32x2_t test_psra_s_i32x2(int32x2_t a, unsigned shamt) {
+ return __riscv_psra_s_i32x2(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psrl_s_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[SHAMT]], i64 0
+// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = lshr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i64 @test_psrl_s_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[SHAMT]], i64 0
+// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = lshr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
+//
+uint32x2_t test_psrl_s_u32x2(uint32x2_t a, unsigned shamt) {
+ return __riscv_psrl_s_u32x2(a, shamt);
+}
>From f7679a66188a463861294d319f230876af2df2d5 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Fri, 13 Feb 2026 10:14:32 +0800
Subject: [PATCH 02/19] Fix list alphabetization and line alignment
---
clang/lib/Headers/riscv_simd.h | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_simd.h
index 262f35b483cbd..21d4d01628562 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_simd.h
@@ -1,4 +1,4 @@
-/*===---- riscv_simd.h - RISC-V P intrinsics -----------------===
+/*===---- riscv_simd.h - RISC-V P intrinsics -------------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
>From 4b132cabcf5c9e62dc7fa6a4b4830ae1c6c92bb2 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Tue, 24 Feb 2026 14:24:08 +0800
Subject: [PATCH 03/19] Rename P-extension header to riscv_packed.h
---
clang/lib/Headers/CMakeLists.txt | 2 +-
clang/lib/Headers/{riscv_simd.h => riscv_packed.h} | 8 ++++----
clang/test/CodeGen/RISCV/rvp-intrinsics.c | 2 +-
3 files changed, 6 insertions(+), 6 deletions(-)
rename clang/lib/Headers/{riscv_simd.h => riscv_packed.h} (98%)
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 968e6234c0949..59cd039747ae6 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -140,7 +140,7 @@ set(riscv_files
riscv_corev_alu.h
riscv_mips.h
riscv_nds.h
- riscv_simd.h
+ riscv_packed.h
sifive_vector.h
)
diff --git a/clang/lib/Headers/riscv_simd.h b/clang/lib/Headers/riscv_packed.h
similarity index 98%
rename from clang/lib/Headers/riscv_simd.h
rename to clang/lib/Headers/riscv_packed.h
index 21d4d01628562..ef2d7b878c2eb 100644
--- a/clang/lib/Headers/riscv_simd.h
+++ b/clang/lib/Headers/riscv_packed.h
@@ -1,4 +1,4 @@
-/*===---- riscv_simd.h - RISC-V P intrinsics -------------------------------===
+/*===---- riscv_packed.h - RISC-V P intrinsics -----------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
@@ -7,8 +7,8 @@
*===-----------------------------------------------------------------------===
*/
-#ifndef __RISCV_SIMD_H
-#define __RISCV_SIMD_H
+#ifndef __RISCV_PACKED_H
+#define __RISCV_PACKED_H
#include <stdint.h>
@@ -242,4 +242,4 @@ __riscv_psra_s_i32x2(int32x2_t __rs1, unsigned __shamt) {
}
#endif
-#endif /* __RISCV_SIMD_H */
+#endif /* __RISCV_PACKED_H */
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
index 40a21fa071387..c80a6ad4e95e7 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
@@ -6,7 +6,7 @@
// RUN: -disable-O0-optnone -emit-llvm -o - %s \
// RUN: | opt -S -passes=sroa,instcombine | FileCheck %s --check-prefix=RV64
-#include <riscv_simd.h>
+#include <riscv_packed.h>
/* 32-bit Packed Addition and Subtraction */
>From f01e439ff7ec63e985adb9c23dded6a0bce55909 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Wed, 25 Feb 2026 17:26:03 +0800
Subject: [PATCH 04/19] [RISCV] Refactor P-extension intrinsics with macros
Co-authored-by: Alexander Richardson <alexrichardson at google.com>
---
clang/lib/Headers/riscv_packed.h | 251 +++++++------------------------
1 file changed, 52 insertions(+), 199 deletions(-)
diff --git a/clang/lib/Headers/riscv_packed.h b/clang/lib/Headers/riscv_packed.h
index ef2d7b878c2eb..07822f257630e 100644
--- a/clang/lib/Headers/riscv_packed.h
+++ b/clang/lib/Headers/riscv_packed.h
@@ -30,213 +30,66 @@ typedef uint16_t uint16x4_t __attribute__((vector_size(8)));
typedef int32_t int32x2_t __attribute__((vector_size(8)));
typedef uint32_t uint32x2_t __attribute__((vector_size(8)));
-/* Packed Addition and Subtraction (32-bit) */
-
-static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
-__riscv_padd_i8x4(int8x4_t __rs1, int8x4_t __rs2) {
- return __rs1 + __rs2;
-}
-
-static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
-__riscv_padd_u8x4(uint8x4_t __rs1, uint8x4_t __rs2) {
- return __rs1 + __rs2;
-}
-
-static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
-__riscv_padd_i16x2(int16x2_t __rs1, int16x2_t __rs2) {
- return __rs1 + __rs2;
-}
-
-static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
-__riscv_padd_u16x2(uint16x2_t __rs1, uint16x2_t __rs2) {
- return __rs1 + __rs2;
-}
-
-static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psub_i8x4(int8x4_t __rs1, int8x4_t __rs2) {
- return __rs1 - __rs2;
-}
+#define _packed_binop(name, retty, ty1, ty2, op) \
+ static __inline__ retty __attribute__((__always_inline__, __nodebug__)) \
+ __riscv_##name(ty1 __rs1, ty2 __rs2) { \
+ return __rs1 op __rs2; \
+ }
-static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psub_u8x4(uint8x4_t __rs1, uint8x4_t __rs2) {
- return __rs1 - __rs2;
-}
+#define _packed_addsub(name, ty, op) _packed_binop(name, ty, ty, ty, op)
+#define _packed_shift(name, ty, op) _packed_binop(name, ty, ty, unsigned, op)
-static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psub_i16x2(int16x2_t __rs1, int16x2_t __rs2) {
- return __rs1 - __rs2;
-}
-
-static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psub_u16x2(uint16x2_t __rs1, uint16x2_t __rs2) {
- return __rs1 - __rs2;
-}
+/* Packed Addition and Subtraction (32-bit) */
+_packed_addsub(padd_i8x4, int8x4_t, +)
+_packed_addsub(padd_u8x4, uint8x4_t, +)
+_packed_addsub(padd_i16x2, int16x2_t, +)
+_packed_addsub(padd_u16x2, uint16x2_t, +)
+_packed_addsub(psub_i8x4, int8x4_t, -)
+_packed_addsub(psub_u8x4, uint8x4_t, -)
+_packed_addsub(psub_i16x2, int16x2_t, -)
+_packed_addsub(psub_u16x2, uint16x2_t, -)
/* Packed Addition and Subtraction (64-bit) */
-
-static __inline__ int8x8_t __attribute__((__always_inline__, __nodebug__))
-__riscv_padd_i8x8(int8x8_t __rs1, int8x8_t __rs2) {
- return __rs1 + __rs2;
-}
-
-static __inline__ uint8x8_t __attribute__((__always_inline__, __nodebug__))
-__riscv_padd_u8x8(uint8x8_t __rs1, uint8x8_t __rs2) {
- return __rs1 + __rs2;
-}
-
-static __inline__ int16x4_t __attribute__((__always_inline__, __nodebug__))
-__riscv_padd_i16x4(int16x4_t __rs1, int16x4_t __rs2) {
- return __rs1 + __rs2;
-}
-
-static __inline__ uint16x4_t __attribute__((__always_inline__, __nodebug__))
-__riscv_padd_u16x4(uint16x4_t __rs1, uint16x4_t __rs2) {
- return __rs1 + __rs2;
-}
-
-static __inline__ int32x2_t __attribute__((__always_inline__, __nodebug__))
-__riscv_padd_i32x2(int32x2_t __rs1, int32x2_t __rs2) {
- return __rs1 + __rs2;
-}
-
-static __inline__ uint32x2_t __attribute__((__always_inline__, __nodebug__))
-__riscv_padd_u32x2(uint32x2_t __rs1, uint32x2_t __rs2) {
- return __rs1 + __rs2;
-}
-
-static __inline__ int8x8_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psub_i8x8(int8x8_t __rs1, int8x8_t __rs2) {
- return __rs1 - __rs2;
-}
-
-static __inline__ uint8x8_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psub_u8x8(uint8x8_t __rs1, uint8x8_t __rs2) {
- return __rs1 - __rs2;
-}
-
-static __inline__ int16x4_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psub_i16x4(int16x4_t __rs1, int16x4_t __rs2) {
- return __rs1 - __rs2;
-}
-
-static __inline__ uint16x4_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psub_u16x4(uint16x4_t __rs1, uint16x4_t __rs2) {
- return __rs1 - __rs2;
-}
-
-static __inline__ int32x2_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psub_i32x2(int32x2_t __rs1, int32x2_t __rs2) {
- return __rs1 - __rs2;
-}
-
-static __inline__ uint32x2_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psub_u32x2(uint32x2_t __rs1, uint32x2_t __rs2) {
- return __rs1 - __rs2;
-}
+_packed_addsub(padd_i8x8, int8x8_t, +)
+_packed_addsub(padd_u8x8, uint8x8_t, +)
+_packed_addsub(padd_i16x4, int16x4_t, +)
+_packed_addsub(padd_u16x4, uint16x4_t, +)
+_packed_addsub(padd_i32x2, int32x2_t, +)
+_packed_addsub(padd_u32x2, uint32x2_t, +)
+_packed_addsub(psub_i8x8, int8x8_t, -)
+_packed_addsub(psub_u8x8, uint8x8_t, -)
+_packed_addsub(psub_i16x4, int16x4_t, -)
+_packed_addsub(psub_u16x4, uint16x4_t, -)
+_packed_addsub(psub_i32x2, int32x2_t, -)
+_packed_addsub(psub_u32x2, uint32x2_t, -)
/* Packed Shifts (32-bit) */
-
-static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psll_s_u8x4(uint8x4_t __rs1, unsigned __shamt) {
- return __rs1 << __shamt;
-}
-
-static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psll_s_i8x4(int8x4_t __rs1, unsigned __shamt) {
- return __rs1 << __shamt;
-}
-
-static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psll_s_u16x2(uint16x2_t __rs1, unsigned __shamt) {
- return __rs1 << __shamt;
-}
-
-static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psll_s_i16x2(int16x2_t __rs1, unsigned __shamt) {
- return __rs1 << __shamt;
-}
-
-static __inline__ uint8x4_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psrl_s_u8x4(uint8x4_t __rs1, unsigned __shamt) {
- return __rs1 >> __shamt;
-}
-
-static __inline__ uint16x2_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psrl_s_u16x2(uint16x2_t __rs1, unsigned __shamt) {
- return __rs1 >> __shamt;
-}
-
-static __inline__ int8x4_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psra_s_i8x4(int8x4_t __rs1, unsigned __shamt) {
- return __rs1 >> __shamt;
-}
-
-static __inline__ int16x2_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psra_s_i16x2(int16x2_t __rs1, unsigned __shamt) {
- return __rs1 >> __shamt;
-}
+_packed_shift(psll_s_u8x4, uint8x4_t, <<)
+_packed_shift(psll_s_i8x4, int8x4_t, <<)
+_packed_shift(psll_s_u16x2, uint16x2_t, <<)
+_packed_shift(psll_s_i16x2, int16x2_t, <<)
+_packed_shift(psrl_s_u8x4, uint8x4_t, >>)
+_packed_shift(psrl_s_u16x2, uint16x2_t, >>)
+_packed_shift(psra_s_i8x4, int8x4_t, >>)
+_packed_shift(psra_s_i16x2, int16x2_t, >>)
/* Packed Shifts (64-bit) */
-
-static __inline__ uint8x8_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psll_s_u8x8(uint8x8_t __rs1, unsigned __shamt) {
- return __rs1 << __shamt;
-}
-
-static __inline__ int8x8_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psll_s_i8x8(int8x8_t __rs1, unsigned __shamt) {
- return __rs1 << __shamt;
-}
-
-static __inline__ uint16x4_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psll_s_u16x4(uint16x4_t __rs1, unsigned __shamt) {
- return __rs1 << __shamt;
-}
-
-static __inline__ int16x4_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psll_s_i16x4(int16x4_t __rs1, unsigned __shamt) {
- return __rs1 << __shamt;
-}
-
-static __inline__ uint32x2_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psll_s_u32x2(uint32x2_t __rs1, unsigned __shamt) {
- return __rs1 << __shamt;
-}
-
-static __inline__ int32x2_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psll_s_i32x2(int32x2_t __rs1, unsigned __shamt) {
- return __rs1 << __shamt;
-}
-
-static __inline__ uint8x8_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psrl_s_u8x8(uint8x8_t __rs1, unsigned __shamt) {
- return __rs1 >> __shamt;
-}
-
-static __inline__ uint16x4_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psrl_s_u16x4(uint16x4_t __rs1, unsigned __shamt) {
- return __rs1 >> __shamt;
-}
-
-static __inline__ uint32x2_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psrl_s_u32x2(uint32x2_t __rs1, unsigned __shamt) {
- return __rs1 >> __shamt;
-}
-
-static __inline__ int8x8_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psra_s_i8x8(int8x8_t __rs1, unsigned __shamt) {
- return __rs1 >> __shamt;
-}
-
-static __inline__ int16x4_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psra_s_i16x4(int16x4_t __rs1, unsigned __shamt) {
- return __rs1 >> __shamt;
-}
-
-static __inline__ int32x2_t __attribute__((__always_inline__, __nodebug__))
-__riscv_psra_s_i32x2(int32x2_t __rs1, unsigned __shamt) {
- return __rs1 >> __shamt;
-}
+_packed_shift(psll_s_u8x8, uint8x8_t, <<)
+_packed_shift(psll_s_i8x8, int8x8_t, <<)
+_packed_shift(psll_s_u16x4, uint16x4_t, <<)
+_packed_shift(psll_s_i16x4, int16x4_t, <<)
+_packed_shift(psll_s_u32x2, uint32x2_t, <<)
+_packed_shift(psll_s_i32x2, int32x2_t, <<)
+_packed_shift(psrl_s_u8x8, uint8x8_t, >>)
+_packed_shift(psrl_s_u16x4, uint16x4_t, >>)
+_packed_shift(psrl_s_u32x2, uint32x2_t, >>)
+_packed_shift(psra_s_i8x8, int8x8_t, >>)
+_packed_shift(psra_s_i16x4, int16x4_t, >>)
+_packed_shift(psra_s_i32x2, int32x2_t, >>)
+
+#undef _packed_addsub
+#undef _packed_shift
+#undef _packed_binop
#if defined(__cplusplus)
}
>From ff65675efbb4d6124e6cc9f398e1006f40869741 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Mon, 2 Mar 2026 14:05:56 +0800
Subject: [PATCH 05/19] [RISCV] Standardize P-extension intrinsics macros and
types
---
clang/lib/Headers/riscv_packed.h | 112 +++++++++++++++----------------
1 file changed, 56 insertions(+), 56 deletions(-)
diff --git a/clang/lib/Headers/riscv_packed.h b/clang/lib/Headers/riscv_packed.h
index 07822f257630e..b201c1f1d3f0b 100644
--- a/clang/lib/Headers/riscv_packed.h
+++ b/clang/lib/Headers/riscv_packed.h
@@ -18,78 +18,78 @@ extern "C" {
/* Packed SIMD Types */
-typedef int8_t int8x4_t __attribute__((vector_size(4)));
-typedef uint8_t uint8x4_t __attribute__((vector_size(4)));
-typedef int16_t int16x2_t __attribute__((vector_size(4)));
-typedef uint16_t uint16x2_t __attribute__((vector_size(4)));
+typedef int8_t int8x4_t __attribute__((__vector_size__(4), __aligned__(4)));
+typedef uint8_t uint8x4_t __attribute__((__vector_size__(4), __aligned__(4)));
+typedef int16_t int16x2_t __attribute__((__vector_size__(4), __aligned__(4)));
+typedef uint16_t uint16x2_t __attribute__((__vector_size__(4), __aligned__(4)));
-typedef int8_t int8x8_t __attribute__((vector_size(8)));
-typedef uint8_t uint8x8_t __attribute__((vector_size(8)));
-typedef int16_t int16x4_t __attribute__((vector_size(8)));
-typedef uint16_t uint16x4_t __attribute__((vector_size(8)));
-typedef int32_t int32x2_t __attribute__((vector_size(8)));
-typedef uint32_t uint32x2_t __attribute__((vector_size(8)));
+typedef int8_t int8x8_t __attribute__((__vector_size__(8), __aligned__(8)));
+typedef uint8_t uint8x8_t __attribute__((__vector_size__(8), __aligned__(8)));
+typedef int16_t int16x4_t __attribute__((__vector_size__(8), __aligned__(8)));
+typedef uint16_t uint16x4_t __attribute__((__vector_size__(8), __aligned__(8)));
+typedef int32_t int32x2_t __attribute__((__vector_size__(8), __aligned__(8)));
+typedef uint32_t uint32x2_t __attribute__((__vector_size__(8), __aligned__(8)));
-#define _packed_binop(name, retty, ty1, ty2, op) \
+#define __packed_binop(name, retty, ty1, ty2, op) \
static __inline__ retty __attribute__((__always_inline__, __nodebug__)) \
__riscv_##name(ty1 __rs1, ty2 __rs2) { \
return __rs1 op __rs2; \
}
-#define _packed_addsub(name, ty, op) _packed_binop(name, ty, ty, ty, op)
-#define _packed_shift(name, ty, op) _packed_binop(name, ty, ty, unsigned, op)
+#define __packed_addsub(name, ty, op) __packed_binop(name, ty, ty, ty, op)
+#define __packed_shift(name, ty, op) __packed_binop(name, ty, ty, unsigned, op)
/* Packed Addition and Subtraction (32-bit) */
-_packed_addsub(padd_i8x4, int8x4_t, +)
-_packed_addsub(padd_u8x4, uint8x4_t, +)
-_packed_addsub(padd_i16x2, int16x2_t, +)
-_packed_addsub(padd_u16x2, uint16x2_t, +)
-_packed_addsub(psub_i8x4, int8x4_t, -)
-_packed_addsub(psub_u8x4, uint8x4_t, -)
-_packed_addsub(psub_i16x2, int16x2_t, -)
-_packed_addsub(psub_u16x2, uint16x2_t, -)
+__packed_addsub(padd_i8x4, int8x4_t, +)
+__packed_addsub(padd_u8x4, uint8x4_t, +)
+__packed_addsub(padd_i16x2, int16x2_t, +)
+__packed_addsub(padd_u16x2, uint16x2_t, +)
+__packed_addsub(psub_i8x4, int8x4_t, -)
+__packed_addsub(psub_u8x4, uint8x4_t, -)
+__packed_addsub(psub_i16x2, int16x2_t, -)
+__packed_addsub(psub_u16x2, uint16x2_t, -)
/* Packed Addition and Subtraction (64-bit) */
-_packed_addsub(padd_i8x8, int8x8_t, +)
-_packed_addsub(padd_u8x8, uint8x8_t, +)
-_packed_addsub(padd_i16x4, int16x4_t, +)
-_packed_addsub(padd_u16x4, uint16x4_t, +)
-_packed_addsub(padd_i32x2, int32x2_t, +)
-_packed_addsub(padd_u32x2, uint32x2_t, +)
-_packed_addsub(psub_i8x8, int8x8_t, -)
-_packed_addsub(psub_u8x8, uint8x8_t, -)
-_packed_addsub(psub_i16x4, int16x4_t, -)
-_packed_addsub(psub_u16x4, uint16x4_t, -)
-_packed_addsub(psub_i32x2, int32x2_t, -)
-_packed_addsub(psub_u32x2, uint32x2_t, -)
+__packed_addsub(padd_i8x8, int8x8_t, +)
+__packed_addsub(padd_u8x8, uint8x8_t, +)
+__packed_addsub(padd_i16x4, int16x4_t, +)
+__packed_addsub(padd_u16x4, uint16x4_t, +)
+__packed_addsub(padd_i32x2, int32x2_t, +)
+__packed_addsub(padd_u32x2, uint32x2_t, +)
+__packed_addsub(psub_i8x8, int8x8_t, -)
+__packed_addsub(psub_u8x8, uint8x8_t, -)
+__packed_addsub(psub_i16x4, int16x4_t, -)
+__packed_addsub(psub_u16x4, uint16x4_t, -)
+__packed_addsub(psub_i32x2, int32x2_t, -)
+__packed_addsub(psub_u32x2, uint32x2_t, -)
/* Packed Shifts (32-bit) */
-_packed_shift(psll_s_u8x4, uint8x4_t, <<)
-_packed_shift(psll_s_i8x4, int8x4_t, <<)
-_packed_shift(psll_s_u16x2, uint16x2_t, <<)
-_packed_shift(psll_s_i16x2, int16x2_t, <<)
-_packed_shift(psrl_s_u8x4, uint8x4_t, >>)
-_packed_shift(psrl_s_u16x2, uint16x2_t, >>)
-_packed_shift(psra_s_i8x4, int8x4_t, >>)
-_packed_shift(psra_s_i16x2, int16x2_t, >>)
+__packed_shift(psll_s_u8x4, uint8x4_t, <<)
+__packed_shift(psll_s_i8x4, int8x4_t, <<)
+__packed_shift(psll_s_u16x2, uint16x2_t, <<)
+__packed_shift(psll_s_i16x2, int16x2_t, <<)
+__packed_shift(psrl_s_u8x4, uint8x4_t, >>)
+__packed_shift(psrl_s_u16x2, uint16x2_t, >>)
+__packed_shift(psra_s_i8x4, int8x4_t, >>)
+__packed_shift(psra_s_i16x2, int16x2_t, >>)
/* Packed Shifts (64-bit) */
-_packed_shift(psll_s_u8x8, uint8x8_t, <<)
-_packed_shift(psll_s_i8x8, int8x8_t, <<)
-_packed_shift(psll_s_u16x4, uint16x4_t, <<)
-_packed_shift(psll_s_i16x4, int16x4_t, <<)
-_packed_shift(psll_s_u32x2, uint32x2_t, <<)
-_packed_shift(psll_s_i32x2, int32x2_t, <<)
-_packed_shift(psrl_s_u8x8, uint8x8_t, >>)
-_packed_shift(psrl_s_u16x4, uint16x4_t, >>)
-_packed_shift(psrl_s_u32x2, uint32x2_t, >>)
-_packed_shift(psra_s_i8x8, int8x8_t, >>)
-_packed_shift(psra_s_i16x4, int16x4_t, >>)
-_packed_shift(psra_s_i32x2, int32x2_t, >>)
+__packed_shift(psll_s_u8x8, uint8x8_t, <<)
+__packed_shift(psll_s_i8x8, int8x8_t, <<)
+__packed_shift(psll_s_u16x4, uint16x4_t, <<)
+__packed_shift(psll_s_i16x4, int16x4_t, <<)
+__packed_shift(psll_s_u32x2, uint32x2_t, <<)
+__packed_shift(psll_s_i32x2, int32x2_t, <<)
+__packed_shift(psrl_s_u8x8, uint8x8_t, >>)
+__packed_shift(psrl_s_u16x4, uint16x4_t, >>)
+__packed_shift(psrl_s_u32x2, uint32x2_t, >>)
+__packed_shift(psra_s_i8x8, int8x8_t, >>)
+__packed_shift(psra_s_i16x4, int16x4_t, >>)
+__packed_shift(psra_s_i32x2, int32x2_t, >>)
-#undef _packed_addsub
-#undef _packed_shift
-#undef _packed_binop
+#undef __packed_addsub
+#undef __packed_shift
+#undef __packed_binop
#if defined(__cplusplus)
}
>From 8a0e30da6dc00aa1affdf65866632c1d316a63d6 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Thu, 12 Mar 2026 19:44:17 +0800
Subject: [PATCH 06/19] [Clang][RISCV] Mask shift amounts in P extension
intrinsics to avoid UB
---
clang/lib/Headers/riscv_packed.h | 60 +++--
clang/test/CodeGen/RISCV/rvp-intrinsics.c | 312 ++++++++++++----------
2 files changed, 210 insertions(+), 162 deletions(-)
diff --git a/clang/lib/Headers/riscv_packed.h b/clang/lib/Headers/riscv_packed.h
index b201c1f1d3f0b..50095bef7ddb3 100644
--- a/clang/lib/Headers/riscv_packed.h
+++ b/clang/lib/Headers/riscv_packed.h
@@ -30,14 +30,20 @@ typedef uint16_t uint16x4_t __attribute__((__vector_size__(8), __aligned__(8)));
typedef int32_t int32x2_t __attribute__((__vector_size__(8), __aligned__(8)));
typedef uint32_t uint32x2_t __attribute__((__vector_size__(8), __aligned__(8)));
-#define __packed_binop(name, retty, ty1, ty2, op) \
- static __inline__ retty __attribute__((__always_inline__, __nodebug__)) \
- __riscv_##name(ty1 __rs1, ty2 __rs2) { \
+#define __packed_addsub(name, ty, op) \
+ static __inline__ ty __attribute__((__always_inline__, __nodebug__)) \
+ __riscv_##name(ty __rs1, ty __rs2) { \
return __rs1 op __rs2; \
}
-#define __packed_addsub(name, ty, op) __packed_binop(name, ty, ty, ty, op)
-#define __packed_shift(name, ty, op) __packed_binop(name, ty, ty, unsigned, op)
+#define __packed_shift(name, ty, op, mask) \
+ static __inline__ ty __attribute__((__always_inline__, __nodebug__)) \
+ __riscv_##name(ty __rs1, unsigned __rs2) { \
+ return __rs1 op (__rs2 & (mask)); \
+ }
+#define __packed_shift8(name, ty, op) __packed_shift(name, ty, op, 0x7)
+#define __packed_shift16(name, ty, op) __packed_shift(name, ty, op, 0xf)
+#define __packed_shift32(name, ty, op) __packed_shift(name, ty, op, 0x1f)
/* Packed Addition and Subtraction (32-bit) */
__packed_addsub(padd_i8x4, int8x4_t, +)
@@ -64,32 +70,34 @@ __packed_addsub(psub_i32x2, int32x2_t, -)
__packed_addsub(psub_u32x2, uint32x2_t, -)
/* Packed Shifts (32-bit) */
-__packed_shift(psll_s_u8x4, uint8x4_t, <<)
-__packed_shift(psll_s_i8x4, int8x4_t, <<)
-__packed_shift(psll_s_u16x2, uint16x2_t, <<)
-__packed_shift(psll_s_i16x2, int16x2_t, <<)
-__packed_shift(psrl_s_u8x4, uint8x4_t, >>)
-__packed_shift(psrl_s_u16x2, uint16x2_t, >>)
-__packed_shift(psra_s_i8x4, int8x4_t, >>)
-__packed_shift(psra_s_i16x2, int16x2_t, >>)
+__packed_shift8(psll_s_u8x4, uint8x4_t, <<)
+__packed_shift8(psll_s_i8x4, int8x4_t, <<)
+__packed_shift16(psll_s_u16x2, uint16x2_t, <<)
+__packed_shift16(psll_s_i16x2, int16x2_t, <<)
+__packed_shift8(psrl_s_u8x4, uint8x4_t, >>)
+__packed_shift16(psrl_s_u16x2, uint16x2_t, >>)
+__packed_shift8(psra_s_i8x4, int8x4_t, >>)
+__packed_shift16(psra_s_i16x2, int16x2_t, >>)
/* Packed Shifts (64-bit) */
-__packed_shift(psll_s_u8x8, uint8x8_t, <<)
-__packed_shift(psll_s_i8x8, int8x8_t, <<)
-__packed_shift(psll_s_u16x4, uint16x4_t, <<)
-__packed_shift(psll_s_i16x4, int16x4_t, <<)
-__packed_shift(psll_s_u32x2, uint32x2_t, <<)
-__packed_shift(psll_s_i32x2, int32x2_t, <<)
-__packed_shift(psrl_s_u8x8, uint8x8_t, >>)
-__packed_shift(psrl_s_u16x4, uint16x4_t, >>)
-__packed_shift(psrl_s_u32x2, uint32x2_t, >>)
-__packed_shift(psra_s_i8x8, int8x8_t, >>)
-__packed_shift(psra_s_i16x4, int16x4_t, >>)
-__packed_shift(psra_s_i32x2, int32x2_t, >>)
+__packed_shift8(psll_s_u8x8, uint8x8_t, <<)
+__packed_shift8(psll_s_i8x8, int8x8_t, <<)
+__packed_shift16(psll_s_u16x4, uint16x4_t, <<)
+__packed_shift16(psll_s_i16x4, int16x4_t, <<)
+__packed_shift32(psll_s_u32x2, uint32x2_t, <<)
+__packed_shift32(psll_s_i32x2, int32x2_t, <<)
+__packed_shift8(psrl_s_u8x8, uint8x8_t, >>)
+__packed_shift16(psrl_s_u16x4, uint16x4_t, >>)
+__packed_shift32(psrl_s_u32x2, uint32x2_t, >>)
+__packed_shift8(psra_s_i8x8, int8x8_t, >>)
+__packed_shift16(psra_s_i16x4, int16x4_t, >>)
+__packed_shift32(psra_s_i32x2, int32x2_t, >>)
#undef __packed_addsub
#undef __packed_shift
-#undef __packed_binop
+#undef __packed_shift8
+#undef __packed_shift16
+#undef __packed_shift32
#if defined(__cplusplus)
}
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
index c80a6ad4e95e7..1c2899684ca39 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
@@ -483,11 +483,12 @@ uint32x2_t test_psub_u32x2(uint32x2_t a, uint32x2_t b) {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
-// RV32-NEXT: ret i32 [[TMP3]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
//
// RV64-LABEL: define dso_local i64 @test_psll_s_i8x4(
// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
@@ -495,11 +496,12 @@ uint32x2_t test_psub_u32x2(uint32x2_t a, uint32x2_t b) {
// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <4 x i8>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP4]] to i64
// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
//
int8x4_t test_psll_s_i8x4(int8x4_t a, unsigned shamt) {
@@ -511,11 +513,12 @@ int8x4_t test_psll_s_i8x4(int8x4_t a, unsigned shamt) {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
-// RV32-NEXT: ret i32 [[TMP3]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
//
// RV64-LABEL: define dso_local i64 @test_psll_s_u8x4(
// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
@@ -523,11 +526,12 @@ int8x4_t test_psll_s_i8x4(int8x4_t a, unsigned shamt) {
// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <4 x i8>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP4]] to i64
// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
//
uint8x4_t test_psll_s_u8x4(uint8x4_t a, unsigned shamt) {
@@ -539,11 +543,12 @@ uint8x4_t test_psll_s_u8x4(uint8x4_t a, unsigned shamt) {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
-// RV32-NEXT: ret i32 [[TMP3]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
//
// RV64-LABEL: define dso_local i64 @test_psll_s_i16x2(
// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
@@ -551,11 +556,12 @@ uint8x4_t test_psll_s_u8x4(uint8x4_t a, unsigned shamt) {
// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <2 x i16>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP4]] to i64
// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
//
int16x2_t test_psll_s_i16x2(int16x2_t a, unsigned shamt) {
@@ -567,11 +573,12 @@ int16x2_t test_psll_s_i16x2(int16x2_t a, unsigned shamt) {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
-// RV32-NEXT: ret i32 [[TMP3]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
//
// RV64-LABEL: define dso_local i64 @test_psll_s_u16x2(
// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
@@ -579,11 +586,12 @@ int16x2_t test_psll_s_i16x2(int16x2_t a, unsigned shamt) {
// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <2 x i16>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP4]] to i64
// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
//
uint16x2_t test_psll_s_u16x2(uint16x2_t a, unsigned shamt) {
@@ -595,11 +603,12 @@ uint16x2_t test_psll_s_u16x2(uint16x2_t a, unsigned shamt) {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
// RV32-NEXT: [[SHR_I:%.*]] = ashr <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
-// RV32-NEXT: ret i32 [[TMP3]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
//
// RV64-LABEL: define dso_local i64 @test_psra_s_i8x4(
// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
@@ -607,11 +616,12 @@ uint16x2_t test_psll_s_u16x2(uint16x2_t a, unsigned shamt) {
// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <4 x i8>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
// RV64-NEXT: [[SHR_I:%.*]] = ashr <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP4]] to i64
// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
//
int8x4_t test_psra_s_i8x4(int8x4_t a, unsigned shamt) {
@@ -623,11 +633,12 @@ int8x4_t test_psra_s_i8x4(int8x4_t a, unsigned shamt) {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
// RV32-NEXT: [[SHR_I:%.*]] = lshr <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
-// RV32-NEXT: ret i32 [[TMP3]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
//
// RV64-LABEL: define dso_local i64 @test_psrl_s_u8x4(
// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
@@ -635,11 +646,12 @@ int8x4_t test_psra_s_i8x4(int8x4_t a, unsigned shamt) {
// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <4 x i8>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = insertelement <4 x i8> poison, i8 [[TMP1]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP2]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
// RV64-NEXT: [[SHR_I:%.*]] = lshr <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP4]] to i64
// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
//
uint8x4_t test_psrl_s_u8x4(uint8x4_t a, unsigned shamt) {
@@ -651,11 +663,12 @@ uint8x4_t test_psrl_s_u8x4(uint8x4_t a, unsigned shamt) {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
// RV32-NEXT: [[SHR_I:%.*]] = ashr <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
-// RV32-NEXT: ret i32 [[TMP3]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
//
// RV64-LABEL: define dso_local i64 @test_psra_s_i16x2(
// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
@@ -663,11 +676,12 @@ uint8x4_t test_psrl_s_u8x4(uint8x4_t a, unsigned shamt) {
// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <2 x i16>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
// RV64-NEXT: [[SHR_I:%.*]] = ashr <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP4]] to i64
// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
//
int16x2_t test_psra_s_i16x2(int16x2_t a, unsigned shamt) {
@@ -679,11 +693,12 @@ int16x2_t test_psra_s_i16x2(int16x2_t a, unsigned shamt) {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
// RV32-NEXT: [[SHR_I:%.*]] = lshr <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
-// RV32-NEXT: ret i32 [[TMP3]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
//
// RV64-LABEL: define dso_local i64 @test_psrl_s_u16x2(
// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
@@ -691,11 +706,12 @@ int16x2_t test_psra_s_i16x2(int16x2_t a, unsigned shamt) {
// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <2 x i16>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = insertelement <2 x i16> poison, i16 [[TMP1]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP2]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
// RV64-NEXT: [[SHR_I:%.*]] = lshr <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP3:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP3]] to i64
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
+// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP4]] to i64
// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
//
uint16x2_t test_psrl_s_u16x2(uint16x2_t a, unsigned shamt) {
@@ -709,22 +725,24 @@ uint16x2_t test_psrl_s_u16x2(uint16x2_t a, unsigned shamt) {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
// RV32-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
-// RV32-NEXT: ret i64 [[TMP3]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
//
// RV64-LABEL: define dso_local i64 @test_psll_s_i8x8(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
// RV64-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
-// RV64-NEXT: ret i64 [[TMP3]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
//
int8x8_t test_psll_s_i8x8(int8x8_t a, unsigned shamt) {
return __riscv_psll_s_i8x8(a, shamt);
@@ -735,22 +753,24 @@ int8x8_t test_psll_s_i8x8(int8x8_t a, unsigned shamt) {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
// RV32-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
-// RV32-NEXT: ret i64 [[TMP3]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
//
// RV64-LABEL: define dso_local i64 @test_psll_s_u8x8(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
// RV64-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
-// RV64-NEXT: ret i64 [[TMP3]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
//
uint8x8_t test_psll_s_u8x8(uint8x8_t a, unsigned shamt) {
return __riscv_psll_s_u8x8(a, shamt);
@@ -761,22 +781,24 @@ uint8x8_t test_psll_s_u8x8(uint8x8_t a, unsigned shamt) {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[TMP1]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
-// RV32-NEXT: ret i64 [[TMP3]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
//
// RV64-LABEL: define dso_local i64 @test_psll_s_i16x4(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[TMP1]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
-// RV64-NEXT: ret i64 [[TMP3]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
//
int16x4_t test_psll_s_i16x4(int16x4_t a, unsigned shamt) {
return __riscv_psll_s_i16x4(a, shamt);
@@ -787,22 +809,24 @@ int16x4_t test_psll_s_i16x4(int16x4_t a, unsigned shamt) {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[TMP1]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
-// RV32-NEXT: ret i64 [[TMP3]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
//
// RV64-LABEL: define dso_local i64 @test_psll_s_u16x4(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[TMP1]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
-// RV64-NEXT: ret i64 [[TMP3]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
//
uint16x4_t test_psll_s_u16x4(uint16x4_t a, unsigned shamt) {
return __riscv_psll_s_u16x4(a, shamt);
@@ -812,7 +836,8 @@ uint16x4_t test_psll_s_u16x4(uint16x4_t a, unsigned shamt) {
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[SHAMT]], i64 0
+// RV32-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
@@ -822,7 +847,8 @@ uint16x4_t test_psll_s_u16x4(uint16x4_t a, unsigned shamt) {
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[SHAMT]], i64 0
+// RV64-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
@@ -836,7 +862,8 @@ int32x2_t test_psll_s_i32x2(int32x2_t a, unsigned shamt) {
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[SHAMT]], i64 0
+// RV32-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
@@ -846,7 +873,8 @@ int32x2_t test_psll_s_i32x2(int32x2_t a, unsigned shamt) {
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[SHAMT]], i64 0
+// RV64-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
@@ -861,22 +889,24 @@ uint32x2_t test_psll_s_u32x2(uint32x2_t a, unsigned shamt) {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
// RV32-NEXT: [[SHR_I:%.*]] = ashr <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
-// RV32-NEXT: ret i64 [[TMP3]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
//
// RV64-LABEL: define dso_local i64 @test_psra_s_i8x8(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
// RV64-NEXT: [[SHR_I:%.*]] = ashr <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
-// RV64-NEXT: ret i64 [[TMP3]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
//
int8x8_t test_psra_s_i8x8(int8x8_t a, unsigned shamt) {
return __riscv_psra_s_i8x8(a, shamt);
@@ -887,22 +917,24 @@ int8x8_t test_psra_s_i8x8(int8x8_t a, unsigned shamt) {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
// RV32-NEXT: [[SHR_I:%.*]] = lshr <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
-// RV32-NEXT: ret i64 [[TMP3]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
//
// RV64-LABEL: define dso_local i64 @test_psrl_s_u8x8(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = insertelement <8 x i8> poison, i8 [[TMP1]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP2]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
// RV64-NEXT: [[SHR_I:%.*]] = lshr <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP3:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
-// RV64-NEXT: ret i64 [[TMP3]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
//
uint8x8_t test_psrl_s_u8x8(uint8x8_t a, unsigned shamt) {
return __riscv_psrl_s_u8x8(a, shamt);
@@ -913,22 +945,24 @@ uint8x8_t test_psrl_s_u8x8(uint8x8_t a, unsigned shamt) {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[TMP1]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
// RV32-NEXT: [[SHR_I:%.*]] = ashr <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
-// RV32-NEXT: ret i64 [[TMP3]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
//
// RV64-LABEL: define dso_local i64 @test_psra_s_i16x4(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[TMP1]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
// RV64-NEXT: [[SHR_I:%.*]] = ashr <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
-// RV64-NEXT: ret i64 [[TMP3]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
//
int16x4_t test_psra_s_i16x4(int16x4_t a, unsigned shamt) {
return __riscv_psra_s_i16x4(a, shamt);
@@ -939,22 +973,24 @@ int16x4_t test_psra_s_i16x4(int16x4_t a, unsigned shamt) {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[TMP1]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
// RV32-NEXT: [[SHR_I:%.*]] = lshr <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
-// RV32-NEXT: ret i64 [[TMP3]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
//
// RV64-LABEL: define dso_local i64 @test_psrl_s_u16x4(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = insertelement <4 x i16> poison, i16 [[TMP1]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP2]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
// RV64-NEXT: [[SHR_I:%.*]] = lshr <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP3:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
-// RV64-NEXT: ret i64 [[TMP3]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
//
uint16x4_t test_psrl_s_u16x4(uint16x4_t a, unsigned shamt) {
return __riscv_psrl_s_u16x4(a, shamt);
@@ -964,7 +1000,8 @@ uint16x4_t test_psrl_s_u16x4(uint16x4_t a, unsigned shamt) {
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[SHAMT]], i64 0
+// RV32-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
// RV32-NEXT: [[SHR_I:%.*]] = ashr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
@@ -974,7 +1011,8 @@ uint16x4_t test_psrl_s_u16x4(uint16x4_t a, unsigned shamt) {
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[SHAMT]], i64 0
+// RV64-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
// RV64-NEXT: [[SHR_I:%.*]] = ashr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
@@ -988,7 +1026,8 @@ int32x2_t test_psra_s_i32x2(int32x2_t a, unsigned shamt) {
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[SHAMT]], i64 0
+// RV32-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
// RV32-NEXT: [[SHR_I:%.*]] = lshr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
@@ -998,7 +1037,8 @@ int32x2_t test_psra_s_i32x2(int32x2_t a, unsigned shamt) {
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[SHAMT]], i64 0
+// RV64-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
// RV64-NEXT: [[SHR_I:%.*]] = lshr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
>From c1de06b23fee896e06e4edf027f3cfe8336a4cf7 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Wed, 1 Apr 2026 09:33:42 +0000
Subject: [PATCH 07/19] rebase: sync with main
---
clang/test/CodeGen/RISCV/rvp-intrinsics.c | 184 +++++++++-------------
1 file changed, 72 insertions(+), 112 deletions(-)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
index 1c2899684ca39..4f22e51216dd4 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
@@ -19,17 +19,14 @@
// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_padd_i8x4(
-// RV64-SAME: i64 [[A_COERCE:%.*]], i64 [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
+// RV64-LABEL: define dso_local i32 @test_padd_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <4 x i8>
-// RV64-NEXT: [[COERCE_VAL_II1_I:%.*]] = trunc i64 [[B_COERCE]] to i32
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1_I]] to <4 x i8>
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP2]] to i64
-// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+// RV64-NEXT: ret i32 [[TMP2]]
//
int8x4_t test_padd_i8x4(int8x4_t a, int8x4_t b) {
return __riscv_padd_i8x4(a, b);
@@ -44,17 +41,14 @@ int8x4_t test_padd_i8x4(int8x4_t a, int8x4_t b) {
// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_padd_u8x4(
-// RV64-SAME: i64 [[A_COERCE:%.*]], i64 [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_padd_u8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <4 x i8>
-// RV64-NEXT: [[COERCE_VAL_II1_I:%.*]] = trunc i64 [[B_COERCE]] to i32
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1_I]] to <4 x i8>
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP2]] to i64
-// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+// RV64-NEXT: ret i32 [[TMP2]]
//
uint8x4_t test_padd_u8x4(uint8x4_t a, uint8x4_t b) {
return __riscv_padd_u8x4(a, b);
@@ -69,17 +63,14 @@ uint8x4_t test_padd_u8x4(uint8x4_t a, uint8x4_t b) {
// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_padd_i16x2(
-// RV64-SAME: i64 [[A_COERCE:%.*]], i64 [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_padd_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <2 x i16>
-// RV64-NEXT: [[COERCE_VAL_II1_I:%.*]] = trunc i64 [[B_COERCE]] to i32
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1_I]] to <2 x i16>
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP2]] to i64
-// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+// RV64-NEXT: ret i32 [[TMP2]]
//
int16x2_t test_padd_i16x2(int16x2_t a, int16x2_t b) {
return __riscv_padd_i16x2(a, b);
@@ -94,17 +85,14 @@ int16x2_t test_padd_i16x2(int16x2_t a, int16x2_t b) {
// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_padd_u16x2(
-// RV64-SAME: i64 [[A_COERCE:%.*]], i64 [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_padd_u16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <2 x i16>
-// RV64-NEXT: [[COERCE_VAL_II1_I:%.*]] = trunc i64 [[B_COERCE]] to i32
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1_I]] to <2 x i16>
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP2]] to i64
-// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+// RV64-NEXT: ret i32 [[TMP2]]
//
uint16x2_t test_padd_u16x2(uint16x2_t a, uint16x2_t b) {
return __riscv_padd_u16x2(a, b);
@@ -119,17 +107,14 @@ uint16x2_t test_padd_u16x2(uint16x2_t a, uint16x2_t b) {
// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psub_i8x4(
-// RV64-SAME: i64 [[A_COERCE:%.*]], i64 [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psub_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <4 x i8>
-// RV64-NEXT: [[COERCE_VAL_II1_I:%.*]] = trunc i64 [[B_COERCE]] to i32
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1_I]] to <4 x i8>
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP2]] to i64
-// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+// RV64-NEXT: ret i32 [[TMP2]]
//
int8x4_t test_psub_i8x4(int8x4_t a, int8x4_t b) {
return __riscv_psub_i8x4(a, b);
@@ -144,17 +129,14 @@ int8x4_t test_psub_i8x4(int8x4_t a, int8x4_t b) {
// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psub_u8x4(
-// RV64-SAME: i64 [[A_COERCE:%.*]], i64 [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psub_u8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <4 x i8>
-// RV64-NEXT: [[COERCE_VAL_II1_I:%.*]] = trunc i64 [[B_COERCE]] to i32
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1_I]] to <4 x i8>
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP2]] to i64
-// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+// RV64-NEXT: ret i32 [[TMP2]]
//
uint8x4_t test_psub_u8x4(uint8x4_t a, uint8x4_t b) {
return __riscv_psub_u8x4(a, b);
@@ -169,17 +151,14 @@ uint8x4_t test_psub_u8x4(uint8x4_t a, uint8x4_t b) {
// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psub_i16x2(
-// RV64-SAME: i64 [[A_COERCE:%.*]], i64 [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psub_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <2 x i16>
-// RV64-NEXT: [[COERCE_VAL_II1_I:%.*]] = trunc i64 [[B_COERCE]] to i32
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1_I]] to <2 x i16>
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP2]] to i64
-// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+// RV64-NEXT: ret i32 [[TMP2]]
//
int16x2_t test_psub_i16x2(int16x2_t a, int16x2_t b) {
return __riscv_psub_i16x2(a, b);
@@ -194,17 +173,14 @@ int16x2_t test_psub_i16x2(int16x2_t a, int16x2_t b) {
// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psub_u16x2(
-// RV64-SAME: i64 [[A_COERCE:%.*]], i64 [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psub_u16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <2 x i16>
-// RV64-NEXT: [[COERCE_VAL_II1_I:%.*]] = trunc i64 [[B_COERCE]] to i32
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[COERCE_VAL_II1_I]] to <2 x i16>
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP2]] to i64
-// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+// RV64-NEXT: ret i32 [[TMP2]]
//
uint16x2_t test_psub_u16x2(uint16x2_t a, uint16x2_t b) {
return __riscv_psub_u16x2(a, b);
@@ -490,19 +466,17 @@ uint32x2_t test_psub_u32x2(uint32x2_t a, uint32x2_t b) {
// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
// RV32-NEXT: ret i32 [[TMP4]]
//
-// RV64-LABEL: define dso_local i64 @test_psll_s_i8x4(
-// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psll_s_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <4 x i8>
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP4]] to i64
-// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+// RV64-NEXT: ret i32 [[TMP4]]
//
int8x4_t test_psll_s_i8x4(int8x4_t a, unsigned shamt) {
return __riscv_psll_s_i8x4(a, shamt);
@@ -520,19 +494,17 @@ int8x4_t test_psll_s_i8x4(int8x4_t a, unsigned shamt) {
// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
// RV32-NEXT: ret i32 [[TMP4]]
//
-// RV64-LABEL: define dso_local i64 @test_psll_s_u8x4(
-// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psll_s_u8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <4 x i8>
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP4]] to i64
-// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+// RV64-NEXT: ret i32 [[TMP4]]
//
uint8x4_t test_psll_s_u8x4(uint8x4_t a, unsigned shamt) {
return __riscv_psll_s_u8x4(a, shamt);
@@ -550,19 +522,17 @@ uint8x4_t test_psll_s_u8x4(uint8x4_t a, unsigned shamt) {
// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
// RV32-NEXT: ret i32 [[TMP4]]
//
-// RV64-LABEL: define dso_local i64 @test_psll_s_i16x2(
-// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psll_s_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <2 x i16>
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP4]] to i64
-// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+// RV64-NEXT: ret i32 [[TMP4]]
//
int16x2_t test_psll_s_i16x2(int16x2_t a, unsigned shamt) {
return __riscv_psll_s_i16x2(a, shamt);
@@ -580,19 +550,17 @@ int16x2_t test_psll_s_i16x2(int16x2_t a, unsigned shamt) {
// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
// RV32-NEXT: ret i32 [[TMP4]]
//
-// RV64-LABEL: define dso_local i64 @test_psll_s_u16x2(
-// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psll_s_u16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <2 x i16>
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP4]] to i64
-// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+// RV64-NEXT: ret i32 [[TMP4]]
//
uint16x2_t test_psll_s_u16x2(uint16x2_t a, unsigned shamt) {
return __riscv_psll_s_u16x2(a, shamt);
@@ -610,19 +578,17 @@ uint16x2_t test_psll_s_u16x2(uint16x2_t a, unsigned shamt) {
// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
// RV32-NEXT: ret i32 [[TMP4]]
//
-// RV64-LABEL: define dso_local i64 @test_psra_s_i8x4(
-// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psra_s_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <4 x i8>
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
// RV64-NEXT: [[SHR_I:%.*]] = ashr <4 x i8> [[TMP0]], [[SH_PROM_I]]
// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP4]] to i64
-// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+// RV64-NEXT: ret i32 [[TMP4]]
//
int8x4_t test_psra_s_i8x4(int8x4_t a, unsigned shamt) {
return __riscv_psra_s_i8x4(a, shamt);
@@ -640,19 +606,17 @@ int8x4_t test_psra_s_i8x4(int8x4_t a, unsigned shamt) {
// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
// RV32-NEXT: ret i32 [[TMP4]]
//
-// RV64-LABEL: define dso_local i64 @test_psrl_s_u8x4(
-// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psrl_s_u8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <4 x i8>
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
// RV64-NEXT: [[SHR_I:%.*]] = lshr <4 x i8> [[TMP0]], [[SH_PROM_I]]
// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP4]] to i64
-// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+// RV64-NEXT: ret i32 [[TMP4]]
//
uint8x4_t test_psrl_s_u8x4(uint8x4_t a, unsigned shamt) {
return __riscv_psrl_s_u8x4(a, shamt);
@@ -670,19 +634,17 @@ uint8x4_t test_psrl_s_u8x4(uint8x4_t a, unsigned shamt) {
// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
// RV32-NEXT: ret i32 [[TMP4]]
//
-// RV64-LABEL: define dso_local i64 @test_psra_s_i16x2(
-// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psra_s_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <2 x i16>
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
// RV64-NEXT: [[SHR_I:%.*]] = ashr <2 x i16> [[TMP0]], [[SH_PROM_I]]
// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP4]] to i64
-// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+// RV64-NEXT: ret i32 [[TMP4]]
//
int16x2_t test_psra_s_i16x2(int16x2_t a, unsigned shamt) {
return __riscv_psra_s_i16x2(a, shamt);
@@ -700,19 +662,17 @@ int16x2_t test_psra_s_i16x2(int16x2_t a, unsigned shamt) {
// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
// RV32-NEXT: ret i32 [[TMP4]]
//
-// RV64-LABEL: define dso_local i64 @test_psrl_s_u16x2(
-// RV64-SAME: i64 [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psrl_s_u16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[COERCE_VAL_II_I:%.*]] = trunc i64 [[A_COERCE]] to i32
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[COERCE_VAL_II_I]] to <2 x i16>
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
// RV64-NEXT: [[SHR_I:%.*]] = lshr <2 x i16> [[TMP0]], [[SH_PROM_I]]
// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
-// RV64-NEXT: [[RETVAL_COERCE_0_INSERT_EXT:%.*]] = zext i32 [[TMP4]] to i64
-// RV64-NEXT: ret i64 [[RETVAL_COERCE_0_INSERT_EXT]]
+// RV64-NEXT: ret i32 [[TMP4]]
//
uint16x2_t test_psrl_s_u16x2(uint16x2_t a, unsigned shamt) {
return __riscv_psrl_s_u16x2(a, shamt);
>From f2b32787160f35f39a6d8955f03f9e06a4c6e164 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Fri, 3 Apr 2026 01:58:33 +0000
Subject: [PATCH 08/19] [RISCV] add more P intrinsics support
pmv_s, padd_s, pneg, pand/por/pxor/pnot, pmin/pmax
TODO: padd_s codegen support
---
clang/lib/Headers/riscv_packed.h | 199 +-
clang/test/CodeGen/RISCV/rvp-intrinsics.c | 2948 ++++++++++++++++-----
2 files changed, 2483 insertions(+), 664 deletions(-)
diff --git a/clang/lib/Headers/riscv_packed.h b/clang/lib/Headers/riscv_packed.h
index 50095bef7ddb3..c7605de340faa 100644
--- a/clang/lib/Headers/riscv_packed.h
+++ b/clang/lib/Headers/riscv_packed.h
@@ -30,14 +30,19 @@ typedef uint16_t uint16x4_t __attribute__((__vector_size__(8), __aligned__(8)));
typedef int32_t int32x2_t __attribute__((__vector_size__(8), __aligned__(8)));
typedef uint32_t uint32x2_t __attribute__((__vector_size__(8), __aligned__(8)));
-#define __packed_addsub(name, ty, op) \
- static __inline__ ty __attribute__((__always_inline__, __nodebug__)) \
- __riscv_##name(ty __rs1, ty __rs2) { \
- return __rs1 op __rs2; \
+#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
+
+#define __packed_splat2(ty, x) ((ty){(x), (x)})
+#define __packed_splat4(ty, x) ((ty){(x), (x), (x), (x)})
+#define __packed_splat8(ty, x) ((ty){(x), (x), (x), (x), (x), (x), (x), (x)})
+
+#define __packed_splat(name, ty, scalar_ty, splat) \
+ static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(scalar_ty __x) { \
+ return splat(ty, __x); \
}
#define __packed_shift(name, ty, op, mask) \
- static __inline__ ty __attribute__((__always_inline__, __nodebug__)) \
+ static __inline__ ty __DEFAULT_FN_ATTRS \
__riscv_##name(ty __rs1, unsigned __rs2) { \
return __rs1 op (__rs2 & (mask)); \
}
@@ -45,29 +50,42 @@ typedef uint32_t uint32x2_t __attribute__((__vector_size__(8), __aligned__(8)));
#define __packed_shift16(name, ty, op) __packed_shift(name, ty, op, 0xf)
#define __packed_shift32(name, ty, op) __packed_shift(name, ty, op, 0x1f)
-/* Packed Addition and Subtraction (32-bit) */
-__packed_addsub(padd_i8x4, int8x4_t, +)
-__packed_addsub(padd_u8x4, uint8x4_t, +)
-__packed_addsub(padd_i16x2, int16x2_t, +)
-__packed_addsub(padd_u16x2, uint16x2_t, +)
-__packed_addsub(psub_i8x4, int8x4_t, -)
-__packed_addsub(psub_u8x4, uint8x4_t, -)
-__packed_addsub(psub_i16x2, int16x2_t, -)
-__packed_addsub(psub_u16x2, uint16x2_t, -)
+#define __packed_scalar_binary_op(name, ty, scalar_ty, op, splat) \
+ static __inline__ ty __DEFAULT_FN_ATTRS \
+ __riscv_##name(ty __rs1, scalar_ty __rs2) { \
+ return __rs1 op splat(ty, __rs2); \
+ }
-/* Packed Addition and Subtraction (64-bit) */
-__packed_addsub(padd_i8x8, int8x8_t, +)
-__packed_addsub(padd_u8x8, uint8x8_t, +)
-__packed_addsub(padd_i16x4, int16x4_t, +)
-__packed_addsub(padd_u16x4, uint16x4_t, +)
-__packed_addsub(padd_i32x2, int32x2_t, +)
-__packed_addsub(padd_u32x2, uint32x2_t, +)
-__packed_addsub(psub_i8x8, int8x8_t, -)
-__packed_addsub(psub_u8x8, uint8x8_t, -)
-__packed_addsub(psub_i16x4, int16x4_t, -)
-__packed_addsub(psub_u16x4, uint16x4_t, -)
-__packed_addsub(psub_i32x2, int32x2_t, -)
-__packed_addsub(psub_u32x2, uint32x2_t, -)
+#define __packed_binary_op(name, ty, op) \
+ static __inline__ ty __DEFAULT_FN_ATTRS \
+ __riscv_##name(ty __rs1, ty __rs2) { \
+ return __rs1 op __rs2; \
+ }
+
+#define __packed_unary_op(name, ty, op) \
+ static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1) { \
+ return op __rs1; \
+ }
+
+#define __packed_minmax(name, ty, builtin) \
+ static __inline__ ty __DEFAULT_FN_ATTRS \
+ __riscv_##name(ty __rs1, ty __rs2) { \
+ return builtin(__rs1, __rs2); \
+ }
+
+/* Packed Splat (32-bit) */
+__packed_splat(pmv_s_u8x4, uint8x4_t, uint8_t, __packed_splat4)
+__packed_splat(pmv_s_i8x4, int8x4_t, int8_t, __packed_splat4)
+__packed_splat(pmv_s_u16x2, uint16x2_t, uint16_t, __packed_splat2)
+__packed_splat(pmv_s_i16x2, int16x2_t, int16_t, __packed_splat2)
+
+/* Packed Splat (64-bit) */
+__packed_splat(pmv_s_u8x8, uint8x8_t, uint8_t, __packed_splat8)
+__packed_splat(pmv_s_i8x8, int8x8_t, int8_t, __packed_splat8)
+__packed_splat(pmv_s_u16x4, uint16x4_t, uint16_t, __packed_splat4)
+__packed_splat(pmv_s_i16x4, int16x4_t, int16_t, __packed_splat4)
+__packed_splat(pmv_s_u32x2, uint32x2_t, uint32_t, __packed_splat2)
+__packed_splat(pmv_s_i32x2, int32x2_t, int32_t, __packed_splat2)
/* Packed Shifts (32-bit) */
__packed_shift8(psll_s_u8x4, uint8x4_t, <<)
@@ -93,11 +111,136 @@ __packed_shift8(psra_s_i8x8, int8x8_t, >>)
__packed_shift16(psra_s_i16x4, int16x4_t, >>)
__packed_shift32(psra_s_i32x2, int32x2_t, >>)
-#undef __packed_addsub
+/* Packed Addition with Scalar (32-bit) */
+__packed_scalar_binary_op(padd_s_u8x4, uint8x4_t, uint8_t, +, __packed_splat4)
+__packed_scalar_binary_op(padd_s_i8x4, int8x4_t, int8_t, +, __packed_splat4)
+__packed_scalar_binary_op(padd_s_u16x2, uint16x2_t, uint16_t, +,
+ __packed_splat2)
+__packed_scalar_binary_op(padd_s_i16x2, int16x2_t, int16_t, +,
+ __packed_splat2)
+
+/* Packed Addition with Scalar (64-bit) */
+__packed_scalar_binary_op(padd_s_u8x8, uint8x8_t, uint8_t, +, __packed_splat8)
+__packed_scalar_binary_op(padd_s_i8x8, int8x8_t, int8_t, +, __packed_splat8)
+__packed_scalar_binary_op(padd_s_u16x4, uint16x4_t, uint16_t, +,
+ __packed_splat4)
+__packed_scalar_binary_op(padd_s_i16x4, int16x4_t, int16_t, +,
+ __packed_splat4)
+__packed_scalar_binary_op(padd_s_u32x2, uint32x2_t, uint32_t, +,
+ __packed_splat2)
+__packed_scalar_binary_op(padd_s_i32x2, int32x2_t, int32_t, +,
+ __packed_splat2)
+
+/* Packed Addition and Subtraction (32-bit) */
+__packed_binary_op(padd_i8x4, int8x4_t, +)
+__packed_binary_op(padd_u8x4, uint8x4_t, +)
+__packed_binary_op(padd_i16x2, int16x2_t, +)
+__packed_binary_op(padd_u16x2, uint16x2_t, +)
+__packed_binary_op(psub_i8x4, int8x4_t, -)
+__packed_binary_op(psub_u8x4, uint8x4_t, -)
+__packed_binary_op(psub_i16x2, int16x2_t, -)
+__packed_binary_op(psub_u16x2, uint16x2_t, -)
+__packed_unary_op(pneg_i8x4, int8x4_t, -)
+__packed_unary_op(pneg_i16x2, int16x2_t, -)
+
+/* Packed Addition and Subtraction (64-bit) */
+__packed_binary_op(padd_i8x8, int8x8_t, +)
+__packed_binary_op(padd_u8x8, uint8x8_t, +)
+__packed_binary_op(padd_i16x4, int16x4_t, +)
+__packed_binary_op(padd_u16x4, uint16x4_t, +)
+__packed_binary_op(padd_i32x2, int32x2_t, +)
+__packed_binary_op(padd_u32x2, uint32x2_t, +)
+__packed_binary_op(psub_i8x8, int8x8_t, -)
+__packed_binary_op(psub_u8x8, uint8x8_t, -)
+__packed_binary_op(psub_i16x4, int16x4_t, -)
+__packed_binary_op(psub_u16x4, uint16x4_t, -)
+__packed_binary_op(psub_i32x2, int32x2_t, -)
+__packed_binary_op(psub_u32x2, uint32x2_t, -)
+__packed_unary_op(pneg_i8x8, int8x8_t, -)
+__packed_unary_op(pneg_i16x4, int16x4_t, -)
+__packed_unary_op(pneg_i32x2, int32x2_t, -)
+
+/* Packed Minimum and Maximum (32-bit) */
+__packed_minmax(pmin_i8x4, int8x4_t, __builtin_elementwise_min)
+__packed_minmax(pmin_i16x2, int16x2_t, __builtin_elementwise_min)
+__packed_minmax(pminu_u8x4, uint8x4_t, __builtin_elementwise_min)
+__packed_minmax(pminu_u16x2, uint16x2_t, __builtin_elementwise_min)
+__packed_minmax(pmax_i8x4, int8x4_t, __builtin_elementwise_max)
+__packed_minmax(pmax_i16x2, int16x2_t, __builtin_elementwise_max)
+__packed_minmax(pmaxu_u8x4, uint8x4_t, __builtin_elementwise_max)
+__packed_minmax(pmaxu_u16x2, uint16x2_t, __builtin_elementwise_max)
+
+/* Packed Minimum and Maximum (64-bit) */
+__packed_minmax(pmin_i8x8, int8x8_t, __builtin_elementwise_min)
+__packed_minmax(pmin_i16x4, int16x4_t, __builtin_elementwise_min)
+__packed_minmax(pmin_i32x2, int32x2_t, __builtin_elementwise_min)
+__packed_minmax(pminu_u8x8, uint8x8_t, __builtin_elementwise_min)
+__packed_minmax(pminu_u16x4, uint16x4_t, __builtin_elementwise_min)
+__packed_minmax(pminu_u32x2, uint32x2_t, __builtin_elementwise_min)
+__packed_minmax(pmax_i8x8, int8x8_t, __builtin_elementwise_max)
+__packed_minmax(pmax_i16x4, int16x4_t, __builtin_elementwise_max)
+__packed_minmax(pmax_i32x2, int32x2_t, __builtin_elementwise_max)
+__packed_minmax(pmaxu_u8x8, uint8x8_t, __builtin_elementwise_max)
+__packed_minmax(pmaxu_u16x4, uint16x4_t, __builtin_elementwise_max)
+__packed_minmax(pmaxu_u32x2, uint32x2_t, __builtin_elementwise_max)
+
+/* Packed Logical Operations (32-bit) */
+__packed_binary_op(pand_i8x4, int8x4_t, &)
+__packed_binary_op(pand_u8x4, uint8x4_t, &)
+__packed_binary_op(pand_i16x2, int16x2_t, &)
+__packed_binary_op(pand_u16x2, uint16x2_t, &)
+__packed_binary_op(por_i8x4, int8x4_t, |)
+__packed_binary_op(por_u8x4, uint8x4_t, |)
+__packed_binary_op(por_i16x2, int16x2_t, |)
+__packed_binary_op(por_u16x2, uint16x2_t, |)
+__packed_binary_op(pxor_i8x4, int8x4_t, ^)
+__packed_binary_op(pxor_u8x4, uint8x4_t, ^)
+__packed_binary_op(pxor_i16x2, int16x2_t, ^)
+__packed_binary_op(pxor_u16x2, uint16x2_t, ^)
+__packed_unary_op(pnot_i8x4, int8x4_t, ~)
+__packed_unary_op(pnot_u8x4, uint8x4_t, ~)
+__packed_unary_op(pnot_i16x2, int16x2_t, ~)
+__packed_unary_op(pnot_u16x2, uint16x2_t, ~)
+
+/* Packed Logical Operations (64-bit) */
+__packed_binary_op(pand_i8x8, int8x8_t, &)
+__packed_binary_op(pand_u8x8, uint8x8_t, &)
+__packed_binary_op(pand_i16x4, int16x4_t, &)
+__packed_binary_op(pand_u16x4, uint16x4_t, &)
+__packed_binary_op(pand_i32x2, int32x2_t, &)
+__packed_binary_op(pand_u32x2, uint32x2_t, &)
+__packed_binary_op(por_i8x8, int8x8_t, |)
+__packed_binary_op(por_u8x8, uint8x8_t, |)
+__packed_binary_op(por_i16x4, int16x4_t, |)
+__packed_binary_op(por_u16x4, uint16x4_t, |)
+__packed_binary_op(por_i32x2, int32x2_t, |)
+__packed_binary_op(por_u32x2, uint32x2_t, |)
+__packed_binary_op(pxor_i8x8, int8x8_t, ^)
+__packed_binary_op(pxor_u8x8, uint8x8_t, ^)
+__packed_binary_op(pxor_i16x4, int16x4_t, ^)
+__packed_binary_op(pxor_u16x4, uint16x4_t, ^)
+__packed_binary_op(pxor_i32x2, int32x2_t, ^)
+__packed_binary_op(pxor_u32x2, uint32x2_t, ^)
+__packed_unary_op(pnot_i8x8, int8x8_t, ~)
+__packed_unary_op(pnot_u8x8, uint8x8_t, ~)
+__packed_unary_op(pnot_i16x4, int16x4_t, ~)
+__packed_unary_op(pnot_u16x4, uint16x4_t, ~)
+__packed_unary_op(pnot_i32x2, int32x2_t, ~)
+__packed_unary_op(pnot_u32x2, uint32x2_t, ~)
+
+#undef __packed_splat2
+#undef __packed_splat4
+#undef __packed_splat8
+#undef __packed_splat
#undef __packed_shift
#undef __packed_shift8
#undef __packed_shift16
#undef __packed_shift32
+#undef __packed_scalar_binary_op
+#undef __packed_binary_op
+#undef __packed_unary_op
+#undef __packed_minmax
+#undef __DEFAULT_FN_ATTRS
#if defined(__cplusplus)
}
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
index 4f22e51216dd4..e79c98dfd93a5 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
@@ -8,1002 +8,2678 @@
#include <riscv_packed.h>
-/* 32-bit Packed Addition and Subtraction */
+/* Packed Splat (32-bit) */
+
+// RV32-LABEL: define dso_local i32 @test_pmv_s_u8x4(
+// RV32-SAME: i8 noundef zeroext [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i8> poison, i8 [[X]], i64 0
+// RV32-NEXT: [[VECINIT3_I:%.*]] = shufflevector <4 x i8> [[VECINIT_I]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[VECINIT3_I]] to i32
+// RV32-NEXT: ret i32 [[TMP0]]
+//
+// RV64-LABEL: define dso_local i32 @test_pmv_s_u8x4(
+// RV64-SAME: i8 noundef zeroext [[X:%.*]]) #[[ATTR0:[0-9]+]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i8> poison, i8 [[X]], i64 0
+// RV64-NEXT: [[VECINIT3_I:%.*]] = shufflevector <4 x i8> [[VECINIT_I]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[VECINIT3_I]] to i32
+// RV64-NEXT: ret i32 [[TMP0]]
+//
+uint8x4_t test_pmv_s_u8x4(uint8_t x) {
+ return __riscv_pmv_s_u8x4(x);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pmv_s_i8x4(
+// RV32-SAME: i8 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i8> poison, i8 [[X]], i64 0
+// RV32-NEXT: [[VECINIT3_I:%.*]] = shufflevector <4 x i8> [[VECINIT_I]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[VECINIT3_I]] to i32
+// RV32-NEXT: ret i32 [[TMP0]]
+//
+// RV64-LABEL: define dso_local i32 @test_pmv_s_i8x4(
+// RV64-SAME: i8 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i8> poison, i8 [[X]], i64 0
+// RV64-NEXT: [[VECINIT3_I:%.*]] = shufflevector <4 x i8> [[VECINIT_I]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[TMP0:%.*]] = bitcast <4 x i8> [[VECINIT3_I]] to i32
+// RV64-NEXT: ret i32 [[TMP0]]
+//
+int8x4_t test_pmv_s_i8x4(int8_t x) {
+ return __riscv_pmv_s_i8x4(x);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pmv_s_u16x2(
+// RV32-SAME: i16 noundef zeroext [[X:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i16> poison, i16 [[X]], i64 0
+// RV32-NEXT: [[VECINIT1_I:%.*]] = shufflevector <2 x i16> [[VECINIT_I]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[TMP0:%.*]] = bitcast <2 x i16> [[VECINIT1_I]] to i32
+// RV32-NEXT: ret i32 [[TMP0]]
+//
+// RV64-LABEL: define dso_local i32 @test_pmv_s_u16x2(
+// RV64-SAME: i16 noundef zeroext [[X:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i16> poison, i16 [[X]], i64 0
+// RV64-NEXT: [[VECINIT1_I:%.*]] = shufflevector <2 x i16> [[VECINIT_I]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[TMP0:%.*]] = bitcast <2 x i16> [[VECINIT1_I]] to i32
+// RV64-NEXT: ret i32 [[TMP0]]
+//
+uint16x2_t test_pmv_s_u16x2(uint16_t x) {
+ return __riscv_pmv_s_u16x2(x);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pmv_s_i16x2(
+// RV32-SAME: i16 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i16> poison, i16 [[X]], i64 0
+// RV32-NEXT: [[VECINIT1_I:%.*]] = shufflevector <2 x i16> [[VECINIT_I]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[TMP0:%.*]] = bitcast <2 x i16> [[VECINIT1_I]] to i32
+// RV32-NEXT: ret i32 [[TMP0]]
+//
+// RV64-LABEL: define dso_local i32 @test_pmv_s_i16x2(
+// RV64-SAME: i16 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i16> poison, i16 [[X]], i64 0
+// RV64-NEXT: [[VECINIT1_I:%.*]] = shufflevector <2 x i16> [[VECINIT_I]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[TMP0:%.*]] = bitcast <2 x i16> [[VECINIT1_I]] to i32
+// RV64-NEXT: ret i32 [[TMP0]]
+//
+int16x2_t test_pmv_s_i16x2(int16_t x) {
+ return __riscv_pmv_s_i16x2(x);
+}
+
+/* Packed Splat (64-bit) */
+
+// RV32-LABEL: define dso_local i64 @test_pmv_s_u8x8(
+// RV32-SAME: i8 noundef zeroext [[X:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i8> poison, i8 [[X]], i64 0
+// RV32-NEXT: [[VECINIT7_I:%.*]] = shufflevector <8 x i8> [[VECINIT_I]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV32-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[VECINIT7_I]] to i64
+// RV32-NEXT: ret i64 [[TMP0]]
+//
+// RV64-LABEL: define dso_local i64 @test_pmv_s_u8x8(
+// RV64-SAME: i8 noundef zeroext [[X:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i8> poison, i8 [[X]], i64 0
+// RV64-NEXT: [[VECINIT7_I:%.*]] = shufflevector <8 x i8> [[VECINIT_I]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV64-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[VECINIT7_I]] to i64
+// RV64-NEXT: ret i64 [[TMP0]]
+//
+uint8x8_t test_pmv_s_u8x8(uint8_t x) {
+ return __riscv_pmv_s_u8x8(x);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pmv_s_i8x8(
+// RV32-SAME: i8 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i8> poison, i8 [[X]], i64 0
+// RV32-NEXT: [[VECINIT7_I:%.*]] = shufflevector <8 x i8> [[VECINIT_I]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV32-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[VECINIT7_I]] to i64
+// RV32-NEXT: ret i64 [[TMP0]]
+//
+// RV64-LABEL: define dso_local i64 @test_pmv_s_i8x8(
+// RV64-SAME: i8 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i8> poison, i8 [[X]], i64 0
+// RV64-NEXT: [[VECINIT7_I:%.*]] = shufflevector <8 x i8> [[VECINIT_I]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV64-NEXT: [[TMP0:%.*]] = bitcast <8 x i8> [[VECINIT7_I]] to i64
+// RV64-NEXT: ret i64 [[TMP0]]
+//
+int8x8_t test_pmv_s_i8x8(int8_t x) {
+ return __riscv_pmv_s_i8x8(x);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pmv_s_u16x4(
+// RV32-SAME: i16 noundef zeroext [[X:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[X]], i64 0
+// RV32-NEXT: [[VECINIT3_I:%.*]] = shufflevector <4 x i16> [[VECINIT_I]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to i64
+// RV32-NEXT: ret i64 [[TMP0]]
+//
+// RV64-LABEL: define dso_local i64 @test_pmv_s_u16x4(
+// RV64-SAME: i16 noundef zeroext [[X:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[X]], i64 0
+// RV64-NEXT: [[VECINIT3_I:%.*]] = shufflevector <4 x i16> [[VECINIT_I]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to i64
+// RV64-NEXT: ret i64 [[TMP0]]
+//
+uint16x4_t test_pmv_s_u16x4(uint16_t x) {
+ return __riscv_pmv_s_u16x4(x);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pmv_s_i16x4(
+// RV32-SAME: i16 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[X]], i64 0
+// RV32-NEXT: [[VECINIT3_I:%.*]] = shufflevector <4 x i16> [[VECINIT_I]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to i64
+// RV32-NEXT: ret i64 [[TMP0]]
+//
+// RV64-LABEL: define dso_local i64 @test_pmv_s_i16x4(
+// RV64-SAME: i16 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[X]], i64 0
+// RV64-NEXT: [[VECINIT3_I:%.*]] = shufflevector <4 x i16> [[VECINIT_I]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[TMP0:%.*]] = bitcast <4 x i16> [[VECINIT3_I]] to i64
+// RV64-NEXT: ret i64 [[TMP0]]
+//
+int16x4_t test_pmv_s_i16x4(int16_t x) {
+ return __riscv_pmv_s_i16x4(x);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pmv_s_u32x2(
+// RV32-SAME: i32 noundef [[X:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i64 0
+// RV32-NEXT: [[VECINIT1_I:%.*]] = shufflevector <2 x i32> [[VECINIT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to i64
+// RV32-NEXT: ret i64 [[TMP0]]
+//
+// RV64-LABEL: define dso_local i64 @test_pmv_s_u32x2(
+// RV64-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i64 0
+// RV64-NEXT: [[VECINIT1_I:%.*]] = shufflevector <2 x i32> [[VECINIT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to i64
+// RV64-NEXT: ret i64 [[TMP0]]
+//
+uint32x2_t test_pmv_s_u32x2(uint32_t x) {
+ return __riscv_pmv_s_u32x2(x);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pmv_s_i32x2(
+// RV32-SAME: i32 noundef [[X:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i64 0
+// RV32-NEXT: [[VECINIT1_I:%.*]] = shufflevector <2 x i32> [[VECINIT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to i64
+// RV32-NEXT: ret i64 [[TMP0]]
+//
+// RV64-LABEL: define dso_local i64 @test_pmv_s_i32x2(
+// RV64-SAME: i32 noundef signext [[X:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[X]], i64 0
+// RV64-NEXT: [[VECINIT1_I:%.*]] = shufflevector <2 x i32> [[VECINIT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[TMP0:%.*]] = bitcast <2 x i32> [[VECINIT1_I]] to i64
+// RV64-NEXT: ret i64 [[TMP0]]
+//
+int32x2_t test_pmv_s_i32x2(int32_t x) {
+ return __riscv_pmv_s_i32x2(x);
+}
+
+/* Packed Shifts (32-bit) */
+
+// RV32-LABEL: define dso_local i32 @test_psll_s_i8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i32 @test_psll_s_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
+// RV64-NEXT: ret i32 [[TMP4]]
+//
+int8x4_t test_psll_s_i8x4(int8x4_t a, unsigned shamt) {
+ return __riscv_psll_s_i8x4(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psll_s_u8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i32 @test_psll_s_u8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
+// RV64-NEXT: ret i32 [[TMP4]]
+//
+uint8x4_t test_psll_s_u8x4(uint8x4_t a, unsigned shamt) {
+ return __riscv_psll_s_u8x4(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psll_s_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i32 @test_psll_s_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
+// RV64-NEXT: ret i32 [[TMP4]]
+//
+int16x2_t test_psll_s_i16x2(int16x2_t a, unsigned shamt) {
+ return __riscv_psll_s_i16x2(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psll_s_u16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i32 @test_psll_s_u16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
+// RV64-NEXT: ret i32 [[TMP4]]
+//
+uint16x2_t test_psll_s_u16x2(uint16x2_t a, unsigned shamt) {
+ return __riscv_psll_s_u16x2(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psra_s_i8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = ashr <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i32 @test_psra_s_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = ashr <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
+// RV64-NEXT: ret i32 [[TMP4]]
+//
+int8x4_t test_psra_s_i8x4(int8x4_t a, unsigned shamt) {
+ return __riscv_psra_s_i8x4(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psrl_s_u8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = lshr <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i32 @test_psrl_s_u8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = lshr <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
+// RV64-NEXT: ret i32 [[TMP4]]
+//
+uint8x4_t test_psrl_s_u8x4(uint8x4_t a, unsigned shamt) {
+ return __riscv_psrl_s_u8x4(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psra_s_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = ashr <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i32 @test_psra_s_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = ashr <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
+// RV64-NEXT: ret i32 [[TMP4]]
+//
+int16x2_t test_psra_s_i16x2(int16x2_t a, unsigned shamt) {
+ return __riscv_psra_s_i16x2(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psrl_s_u16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = lshr <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i32 @test_psrl_s_u16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = lshr <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
+// RV64-NEXT: ret i32 [[TMP4]]
+//
+uint16x2_t test_psrl_s_u16x2(uint16x2_t a, unsigned shamt) {
+ return __riscv_psrl_s_u16x2(a, shamt);
+}
+
+/* Packed Shifts (64-bit) */
+
+// RV32-LABEL: define dso_local i64 @test_psll_s_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i64 @test_psll_s_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
+//
+int8x8_t test_psll_s_i8x8(int8x8_t a, unsigned shamt) {
+ return __riscv_psll_s_i8x8(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psll_s_u8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i64 @test_psll_s_u8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
+//
+uint8x8_t test_psll_s_u8x8(uint8x8_t a, unsigned shamt) {
+ return __riscv_psll_s_u8x8(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psll_s_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i64 @test_psll_s_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
+//
+int16x4_t test_psll_s_i16x4(int16x4_t a, unsigned shamt) {
+ return __riscv_psll_s_i16x4(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psll_s_u16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i64 @test_psll_s_u16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
+//
+uint16x4_t test_psll_s_u16x4(uint16x4_t a, unsigned shamt) {
+ return __riscv_psll_s_u16x4(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psll_s_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
+// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i64 @test_psll_s_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
+// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
+//
+int32x2_t test_psll_s_i32x2(int32x2_t a, unsigned shamt) {
+ return __riscv_psll_s_i32x2(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psll_s_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
+// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i64 @test_psll_s_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
+// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
+//
+uint32x2_t test_psll_s_u32x2(uint32x2_t a, unsigned shamt) {
+ return __riscv_psll_s_u32x2(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psra_s_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = ashr <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i64 @test_psra_s_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = ashr <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
+//
+int8x8_t test_psra_s_i8x8(int8x8_t a, unsigned shamt) {
+ return __riscv_psra_s_i8x8(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psrl_s_u8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = lshr <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i64 @test_psrl_s_u8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = lshr <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
+//
+uint8x8_t test_psrl_s_u8x8(uint8x8_t a, unsigned shamt) {
+ return __riscv_psrl_s_u8x8(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psra_s_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = ashr <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i64 @test_psra_s_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = ashr <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
+//
+int16x4_t test_psra_s_i16x4(int16x4_t a, unsigned shamt) {
+ return __riscv_psra_s_i16x4(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psrl_s_u16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = lshr <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
+//
+// RV64-LABEL: define dso_local i64 @test_psrl_s_u16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = lshr <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
+//
+uint16x4_t test_psrl_s_u16x4(uint16x4_t a, unsigned shamt) {
+ return __riscv_psrl_s_u16x4(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psra_s_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
+// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = ashr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i64 @test_psra_s_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
+// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = ashr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
+//
+int32x2_t test_psra_s_i32x2(int32x2_t a, unsigned shamt) {
+ return __riscv_psra_s_i32x2(a, shamt);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psrl_s_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
+// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = lshr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i64 @test_psrl_s_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
+// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = lshr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
+//
+uint32x2_t test_psrl_s_u32x2(uint32x2_t a, unsigned shamt) {
+ return __riscv_psrl_s_u32x2(a, shamt);
+}
+
+/* Packed Addition with Scalar (32-bit) */
+
+// RV32-LABEL: define dso_local i32 @test_padd_s_u8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i8 noundef zeroext [[B:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0
+// RV32-NEXT: [[VECINIT4_I:%.*]] = shufflevector <4 x i8> [[VECINIT_I]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[VECINIT4_I]], [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i32 @test_padd_s_u8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i8 noundef zeroext [[B:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0
+// RV64-NEXT: [[VECINIT4_I:%.*]] = shufflevector <4 x i8> [[VECINIT_I]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[VECINIT4_I]], [[TMP0]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV64-NEXT: ret i32 [[TMP1]]
+//
+uint8x4_t test_padd_s_u8x4(uint8x4_t a, uint8_t b) {
+ return __riscv_padd_s_u8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_padd_s_i8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i8 noundef signext [[B:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0
+// RV32-NEXT: [[VECINIT4_I:%.*]] = shufflevector <4 x i8> [[VECINIT_I]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[VECINIT4_I]], [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i32 @test_padd_s_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i8 noundef signext [[B:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0
+// RV64-NEXT: [[VECINIT4_I:%.*]] = shufflevector <4 x i8> [[VECINIT_I]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[VECINIT4_I]], [[TMP0]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV64-NEXT: ret i32 [[TMP1]]
+//
+int8x4_t test_padd_s_i8x4(int8x4_t a, int8_t b) {
+ return __riscv_padd_s_i8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_padd_s_u16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i16 noundef zeroext [[B:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i16> poison, i16 [[B]], i64 0
+// RV32-NEXT: [[VECINIT2_I:%.*]] = shufflevector <2 x i16> [[VECINIT_I]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[VECINIT2_I]], [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i32 @test_padd_s_u16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i16 noundef zeroext [[B:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i16> poison, i16 [[B]], i64 0
+// RV64-NEXT: [[VECINIT2_I:%.*]] = shufflevector <2 x i16> [[VECINIT_I]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[VECINIT2_I]], [[TMP0]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV64-NEXT: ret i32 [[TMP1]]
+//
+uint16x2_t test_padd_s_u16x2(uint16x2_t a, uint16_t b) {
+ return __riscv_padd_s_u16x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_padd_s_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i16 noundef signext [[B:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i16> poison, i16 [[B]], i64 0
+// RV32-NEXT: [[VECINIT2_I:%.*]] = shufflevector <2 x i16> [[VECINIT_I]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[VECINIT2_I]], [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i32 @test_padd_s_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i16 noundef signext [[B:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i16> poison, i16 [[B]], i64 0
+// RV64-NEXT: [[VECINIT2_I:%.*]] = shufflevector <2 x i16> [[VECINIT_I]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[VECINIT2_I]], [[TMP0]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV64-NEXT: ret i32 [[TMP1]]
+//
+int16x2_t test_padd_s_i16x2(int16x2_t a, int16_t b) {
+ return __riscv_padd_s_i16x2(a, b);
+}
+
+/* Packed Addition with Scalar (64-bit) */
+
+// RV32-LABEL: define dso_local i64 @test_padd_s_u8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i8 noundef zeroext [[B:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
+// RV32-NEXT: [[VECINIT8_I:%.*]] = shufflevector <8 x i8> [[VECINIT_I]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV32-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[VECINIT8_I]], [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_s_u8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i8 noundef zeroext [[B:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
+// RV64-NEXT: [[VECINIT8_I:%.*]] = shufflevector <8 x i8> [[VECINIT_I]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV64-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[VECINIT8_I]], [[TMP0]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
+//
+uint8x8_t test_padd_s_u8x8(uint8x8_t a, uint8_t b) {
+ return __riscv_padd_s_u8x8(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_padd_s_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i8 noundef signext [[B:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
+// RV32-NEXT: [[VECINIT8_I:%.*]] = shufflevector <8 x i8> [[VECINIT_I]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV32-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[VECINIT8_I]], [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_s_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i8 noundef signext [[B:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <8 x i8> poison, i8 [[B]], i64 0
+// RV64-NEXT: [[VECINIT8_I:%.*]] = shufflevector <8 x i8> [[VECINIT_I]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV64-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[VECINIT8_I]], [[TMP0]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
+//
+int8x8_t test_padd_s_i8x8(int8x8_t a, int8_t b) {
+ return __riscv_padd_s_i8x8(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_padd_s_u16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i16 noundef zeroext [[B:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// RV32-NEXT: [[VECINIT4_I:%.*]] = shufflevector <4 x i16> [[VECINIT_I]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[VECINIT4_I]], [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_s_u16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i16 noundef zeroext [[B:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// RV64-NEXT: [[VECINIT4_I:%.*]] = shufflevector <4 x i16> [[VECINIT_I]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[VECINIT4_I]], [[TMP0]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
+//
+uint16x4_t test_padd_s_u16x4(uint16x4_t a, uint16_t b) {
+ return __riscv_padd_s_u16x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_padd_s_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i16 noundef signext [[B:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// RV32-NEXT: [[VECINIT4_I:%.*]] = shufflevector <4 x i16> [[VECINIT_I]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[VECINIT4_I]], [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_s_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i16 noundef signext [[B:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i16> poison, i16 [[B]], i64 0
+// RV64-NEXT: [[VECINIT4_I:%.*]] = shufflevector <4 x i16> [[VECINIT_I]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[VECINIT4_I]], [[TMP0]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
+//
+int16x4_t test_padd_s_i16x4(int16x4_t a, int16_t b) {
+ return __riscv_padd_s_i16x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_padd_s_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[B]], i64 0
+// RV32-NEXT: [[VECINIT2_I:%.*]] = shufflevector <2 x i32> [[VECINIT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[VECINIT2_I]], [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_s_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[B:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[B]], i64 0
+// RV64-NEXT: [[VECINIT2_I:%.*]] = shufflevector <2 x i32> [[VECINIT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[VECINIT2_I]], [[TMP0]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
+//
+uint32x2_t test_padd_s_u32x2(uint32x2_t a, uint32_t b) {
+ return __riscv_padd_s_u32x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_padd_s_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[B:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[B]], i64 0
+// RV32-NEXT: [[VECINIT2_I:%.*]] = shufflevector <2 x i32> [[VECINIT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[VECINIT2_I]], [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_s_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[B:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <2 x i32> poison, i32 [[B]], i64 0
+// RV64-NEXT: [[VECINIT2_I:%.*]] = shufflevector <2 x i32> [[VECINIT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[VECINIT2_I]], [[TMP0]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
+//
+int32x2_t test_padd_s_i32x2(int32x2_t a, int32_t b) {
+ return __riscv_padd_s_i32x2(a, b);
+}
+
+/* Packed Addition and Subtraction (32-bit) */
// RV32-LABEL: define dso_local i32 @test_padd_i8x4(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_padd_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+int8x4_t test_padd_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_padd_i8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_padd_u8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_padd_u8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+uint8x4_t test_padd_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_padd_u8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_padd_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_padd_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+int16x2_t test_padd_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_padd_i16x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_padd_u16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_padd_u16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+uint16x2_t test_padd_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_padd_u16x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psub_i8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_psub_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+int8x4_t test_psub_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_psub_i8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psub_u8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_psub_u8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+uint8x4_t test_psub_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_psub_u8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psub_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_psub_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+int16x2_t test_psub_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_psub_i16x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psub_u16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_psub_u16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+uint16x2_t test_psub_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_psub_u16x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pneg_i8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i8> zeroinitializer, [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV32-NEXT: ret i32 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i32 @test_pneg_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i8> zeroinitializer, [[TMP0]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV64-NEXT: ret i32 [[TMP1]]
+//
+int8x4_t test_pneg_i8x4(int8x4_t a) {
+ return __riscv_pneg_i8x4(a);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pneg_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i16> zeroinitializer, [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV32-NEXT: ret i32 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i32 @test_pneg_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i16> zeroinitializer, [[TMP0]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV64-NEXT: ret i32 [[TMP1]]
+//
+int16x2_t test_pneg_i16x2(int16x2_t a) {
+ return __riscv_pneg_i16x2(a);
+}
+
+/* Packed Addition and Subtraction (64-bit) */
+
+// RV32-LABEL: define dso_local i64 @test_padd_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int8x8_t test_padd_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_padd_i8x8(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_padd_u8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_u8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint8x8_t test_padd_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_padd_u8x8(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_padd_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int16x4_t test_padd_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_padd_i16x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_padd_u16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_u16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint16x4_t test_padd_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_padd_u16x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_padd_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int32x2_t test_padd_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_padd_i32x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_padd_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_padd_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint32x2_t test_padd_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_padd_u32x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psub_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psub_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int8x8_t test_psub_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_psub_i8x8(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psub_u8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
-// RV32-NEXT: ret i32 [[TMP2]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_padd_i8x4(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0:[0-9]+]] {
+// RV64-LABEL: define dso_local i64 @test_psub_u8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
-// RV64-NEXT: ret i32 [[TMP2]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
//
-int8x4_t test_padd_i8x4(int8x4_t a, int8x4_t b) {
- return __riscv_padd_i8x4(a, b);
+uint8x8_t test_psub_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_psub_u8x8(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_padd_u8x4(
+// RV32-LABEL: define dso_local i64 @test_psub_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psub_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int16x4_t test_psub_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_psub_i16x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psub_u16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psub_u16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint16x4_t test_psub_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_psub_u16x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psub_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psub_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int32x2_t test_psub_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_psub_i32x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psub_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psub_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint32x2_t test_psub_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_psub_u32x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pneg_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i64 @test_pneg_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, [[TMP0]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
+//
+int8x8_t test_pneg_i8x8(int8x8_t a) {
+ return __riscv_pneg_i8x8(a);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pneg_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i64 @test_pneg_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, [[TMP0]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
+//
+int16x4_t test_pneg_i16x4(int16x4_t a) {
+ return __riscv_pneg_i16x4(a);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pneg_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i64 @test_pneg_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, [[TMP0]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
+//
+int32x2_t test_pneg_i32x2(int32x2_t a) {
+ return __riscv_pneg_i32x2(a);
+}
+
+/* Packed Minimum and Maximum (32-bit) */
+
+// RV32-LABEL: define dso_local i32 @test_pmin_i8x4(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MIN_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_padd_u8x4(
+// RV64-LABEL: define dso_local i32 @test_pmin_i8x4(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MIN_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-uint8x4_t test_padd_u8x4(uint8x4_t a, uint8x4_t b) {
- return __riscv_padd_u8x4(a, b);
+int8x4_t test_pmin_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_pmin_i8x4(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_padd_i16x2(
+// RV32-LABEL: define dso_local i32 @test_pmin_i16x2(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MIN_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_padd_i16x2(
+// RV64-LABEL: define dso_local i32 @test_pmin_i16x2(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MIN_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-int16x2_t test_padd_i16x2(int16x2_t a, int16x2_t b) {
- return __riscv_padd_i16x2(a, b);
+int16x2_t test_pmin_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_pmin_i16x2(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_padd_u16x2(
+// RV32-LABEL: define dso_local i32 @test_pminu_u8x4(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MIN_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_padd_u16x2(
+// RV64-LABEL: define dso_local i32 @test_pminu_u8x4(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MIN_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-uint16x2_t test_padd_u16x2(uint16x2_t a, uint16x2_t b) {
- return __riscv_padd_u16x2(a, b);
+uint8x4_t test_pminu_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_pminu_u8x4(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psub_i8x4(
+// RV32-LABEL: define dso_local i32 @test_pminu_u16x2(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MIN_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_psub_i8x4(
+// RV64-LABEL: define dso_local i32 @test_pminu_u16x2(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MIN_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-int8x4_t test_psub_i8x4(int8x4_t a, int8x4_t b) {
- return __riscv_psub_i8x4(a, b);
+uint16x2_t test_pminu_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_pminu_u16x2(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psub_u8x4(
+// RV32-LABEL: define dso_local i32 @test_pmax_i8x4(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MAX_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_psub_u8x4(
+// RV64-LABEL: define dso_local i32 @test_pmax_i8x4(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MAX_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-uint8x4_t test_psub_u8x4(uint8x4_t a, uint8x4_t b) {
- return __riscv_psub_u8x4(a, b);
+int8x4_t test_pmax_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_pmax_i8x4(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psub_i16x2(
+// RV32-LABEL: define dso_local i32 @test_pmax_i16x2(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MAX_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_psub_i16x2(
+// RV64-LABEL: define dso_local i32 @test_pmax_i16x2(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MAX_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-int16x2_t test_psub_i16x2(int16x2_t a, int16x2_t b) {
- return __riscv_psub_i16x2(a, b);
+int16x2_t test_pmax_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_pmax_i16x2(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psub_u16x2(
+// RV32-LABEL: define dso_local i32 @test_pmaxu_u8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MAX_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_pmaxu_u8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MAX_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+uint8x4_t test_pmaxu_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_pmaxu_u8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pmaxu_u16x2(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MAX_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_psub_u16x2(
+// RV64-LABEL: define dso_local i32 @test_pmaxu_u16x2(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MAX_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-uint16x2_t test_psub_u16x2(uint16x2_t a, uint16x2_t b) {
- return __riscv_psub_u16x2(a, b);
+uint16x2_t test_pmaxu_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_pmaxu_u16x2(a, b);
+}
+
+/* Packed Minimum and Maximum (64-bit) */
+
+// RV32-LABEL: define dso_local i64 @test_pmin_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <8 x i8> @llvm.smin.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MIN_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_pmin_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <8 x i8> @llvm.smin.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MIN_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int8x8_t test_pmin_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_pmin_i8x8(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pmin_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MIN_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_pmin_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MIN_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int16x4_t test_pmin_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_pmin_i16x4(a, b);
}
-/* 64-bit Packed Addition and Subtraction */
-
-// RV32-LABEL: define dso_local i64 @test_padd_i8x8(
+// RV32-LABEL: define dso_local i64 @test_pmin_i32x2(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV32-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MIN_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_padd_i8x8(
+// RV64-LABEL: define dso_local i64 @test_pmin_i32x2(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV64-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MIN_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-int8x8_t test_padd_i8x8(int8x8_t a, int8x8_t b) {
- return __riscv_padd_i8x8(a, b);
+int32x2_t test_pmin_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_pmin_i32x2(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_padd_u8x8(
+// RV32-LABEL: define dso_local i64 @test_pminu_u8x8(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV32-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <8 x i8> @llvm.umin.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MIN_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_padd_u8x8(
+// RV64-LABEL: define dso_local i64 @test_pminu_u8x8(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV64-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <8 x i8> @llvm.umin.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MIN_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-uint8x8_t test_padd_u8x8(uint8x8_t a, uint8x8_t b) {
- return __riscv_padd_u8x8(a, b);
+uint8x8_t test_pminu_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_pminu_u8x8(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_padd_i16x4(
+// RV32-LABEL: define dso_local i64 @test_pminu_u16x4(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MIN_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_padd_i16x4(
+// RV64-LABEL: define dso_local i64 @test_pminu_u16x4(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MIN_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-int16x4_t test_padd_i16x4(int16x4_t a, int16x4_t b) {
- return __riscv_padd_i16x4(a, b);
+uint16x4_t test_pminu_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_pminu_u16x4(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_padd_u16x4(
+// RV32-LABEL: define dso_local i64 @test_pminu_u32x2(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MIN_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_padd_u16x4(
+// RV64-LABEL: define dso_local i64 @test_pminu_u32x2(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MIN_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-uint16x4_t test_padd_u16x4(uint16x4_t a, uint16x4_t b) {
- return __riscv_padd_u16x4(a, b);
+uint32x2_t test_pminu_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_pminu_u32x2(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_padd_i32x2(
+// RV32-LABEL: define dso_local i64 @test_pmax_i8x8(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MAX_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_padd_i32x2(
+// RV64-LABEL: define dso_local i64 @test_pmax_i8x8(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MAX_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-int32x2_t test_padd_i32x2(int32x2_t a, int32x2_t b) {
- return __riscv_padd_i32x2(a, b);
+int8x8_t test_pmax_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_pmax_i8x8(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_padd_u32x2(
+// RV32-LABEL: define dso_local i64 @test_pmax_i16x4(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i16> @llvm.smax.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MAX_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_padd_u32x2(
+// RV64-LABEL: define dso_local i64 @test_pmax_i16x4(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i16> @llvm.smax.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MAX_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-uint32x2_t test_padd_u32x2(uint32x2_t a, uint32x2_t b) {
- return __riscv_padd_u32x2(a, b);
+int16x4_t test_pmax_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_pmax_i16x4(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psub_i8x8(
+// RV32-LABEL: define dso_local i64 @test_pmax_i32x2(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MAX_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psub_i8x8(
+// RV64-LABEL: define dso_local i64 @test_pmax_i32x2(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MAX_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-int8x8_t test_psub_i8x8(int8x8_t a, int8x8_t b) {
- return __riscv_psub_i8x8(a, b);
+int32x2_t test_pmax_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_pmax_i32x2(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psub_u8x8(
+// RV32-LABEL: define dso_local i64 @test_pmaxu_u8x8(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <8 x i8> @llvm.umax.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MAX_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psub_u8x8(
+// RV64-LABEL: define dso_local i64 @test_pmaxu_u8x8(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <8 x i8> @llvm.umax.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MAX_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-uint8x8_t test_psub_u8x8(uint8x8_t a, uint8x8_t b) {
- return __riscv_psub_u8x8(a, b);
+uint8x8_t test_pmaxu_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_pmaxu_u8x8(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psub_i16x4(
+// RV32-LABEL: define dso_local i64 @test_pmaxu_u16x4(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i16> @llvm.umax.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MAX_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psub_i16x4(
+// RV64-LABEL: define dso_local i64 @test_pmaxu_u16x4(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i16> @llvm.umax.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MAX_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint16x4_t test_pmaxu_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_pmaxu_u16x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pmaxu_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MAX_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_pmaxu_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MAX_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-int16x4_t test_psub_i16x4(int16x4_t a, int16x4_t b) {
- return __riscv_psub_i16x4(a, b);
+uint32x2_t test_pmaxu_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_pmaxu_u32x2(a, b);
+}
+
+/* Packed Logical Operations (32-bit) */
+
+// RV32-LABEL: define dso_local i32 @test_pand_i8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[AND_I1:%.*]] = and i32 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i32 [[AND_I1]]
+//
+// RV64-LABEL: define dso_local i32 @test_pand_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[AND_I1:%.*]] = and i32 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i32 [[AND_I1]]
+//
+int8x4_t test_pand_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_pand_i8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pand_u8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[AND_I1:%.*]] = and i32 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i32 [[AND_I1]]
+//
+// RV64-LABEL: define dso_local i32 @test_pand_u8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[AND_I1:%.*]] = and i32 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i32 [[AND_I1]]
+//
+uint8x4_t test_pand_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_pand_u8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pand_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[AND_I1:%.*]] = and i32 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i32 [[AND_I1]]
+//
+// RV64-LABEL: define dso_local i32 @test_pand_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[AND_I1:%.*]] = and i32 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i32 [[AND_I1]]
+//
+int16x2_t test_pand_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_pand_i16x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pand_u16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[AND_I1:%.*]] = and i32 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i32 [[AND_I1]]
+//
+// RV64-LABEL: define dso_local i32 @test_pand_u16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[AND_I1:%.*]] = and i32 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i32 [[AND_I1]]
+//
+uint16x2_t test_pand_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_pand_u16x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_por_i8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[OR_I1:%.*]] = or i32 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i32 [[OR_I1]]
+//
+// RV64-LABEL: define dso_local i32 @test_por_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[OR_I1:%.*]] = or i32 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i32 [[OR_I1]]
+//
+int8x4_t test_por_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_por_i8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_por_u8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[OR_I1:%.*]] = or i32 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i32 [[OR_I1]]
+//
+// RV64-LABEL: define dso_local i32 @test_por_u8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[OR_I1:%.*]] = or i32 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i32 [[OR_I1]]
+//
+uint8x4_t test_por_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_por_u8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_por_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[OR_I1:%.*]] = or i32 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i32 [[OR_I1]]
+//
+// RV64-LABEL: define dso_local i32 @test_por_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[OR_I1:%.*]] = or i32 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i32 [[OR_I1]]
+//
+int16x2_t test_por_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_por_i16x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_por_u16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[OR_I1:%.*]] = or i32 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i32 [[OR_I1]]
+//
+// RV64-LABEL: define dso_local i32 @test_por_u16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[OR_I1:%.*]] = or i32 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i32 [[OR_I1]]
+//
+uint16x2_t test_por_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_por_u16x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pxor_i8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[XOR_I1:%.*]] = xor i32 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i32 [[XOR_I1]]
+//
+// RV64-LABEL: define dso_local i32 @test_pxor_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[XOR_I1:%.*]] = xor i32 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i32 [[XOR_I1]]
+//
+int8x4_t test_pxor_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_pxor_i8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pxor_u8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[XOR_I1:%.*]] = xor i32 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i32 [[XOR_I1]]
+//
+// RV64-LABEL: define dso_local i32 @test_pxor_u8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[XOR_I1:%.*]] = xor i32 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i32 [[XOR_I1]]
+//
+uint8x4_t test_pxor_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_pxor_u8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pxor_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[XOR_I1:%.*]] = xor i32 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i32 [[XOR_I1]]
+//
+// RV64-LABEL: define dso_local i32 @test_pxor_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[XOR_I1:%.*]] = xor i32 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i32 [[XOR_I1]]
+//
+int16x2_t test_pxor_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_pxor_i16x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pxor_u16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[XOR_I1:%.*]] = xor i32 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i32 [[XOR_I1]]
+//
+// RV64-LABEL: define dso_local i32 @test_pxor_u16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[XOR_I1:%.*]] = xor i32 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i32 [[XOR_I1]]
+//
+uint16x2_t test_pxor_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_pxor_u16x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pnot_i8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[NOT_I:%.*]] = xor <4 x i8> [[TMP0]], splat (i8 -1)
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[NOT_I]] to i32
+// RV32-NEXT: ret i32 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i32 @test_pnot_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[NOT_I:%.*]] = xor <4 x i8> [[TMP0]], splat (i8 -1)
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[NOT_I]] to i32
+// RV64-NEXT: ret i32 [[TMP1]]
+//
+int8x4_t test_pnot_i8x4(int8x4_t a) {
+ return __riscv_pnot_i8x4(a);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pnot_u8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[NOT_I:%.*]] = xor <4 x i8> [[TMP0]], splat (i8 -1)
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[NOT_I]] to i32
+// RV32-NEXT: ret i32 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i32 @test_pnot_u8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[NOT_I:%.*]] = xor <4 x i8> [[TMP0]], splat (i8 -1)
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[NOT_I]] to i32
+// RV64-NEXT: ret i32 [[TMP1]]
+//
+uint8x4_t test_pnot_u8x4(uint8x4_t a) {
+ return __riscv_pnot_u8x4(a);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pnot_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[NOT_I:%.*]] = xor <2 x i16> [[TMP0]], splat (i16 -1)
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[NOT_I]] to i32
+// RV32-NEXT: ret i32 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i32 @test_pnot_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[NOT_I:%.*]] = xor <2 x i16> [[TMP0]], splat (i16 -1)
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[NOT_I]] to i32
+// RV64-NEXT: ret i32 [[TMP1]]
+//
+int16x2_t test_pnot_i16x2(int16x2_t a) {
+ return __riscv_pnot_i16x2(a);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pnot_u16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[NOT_I:%.*]] = xor <2 x i16> [[TMP0]], splat (i16 -1)
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[NOT_I]] to i32
+// RV32-NEXT: ret i32 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i32 @test_pnot_u16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[NOT_I:%.*]] = xor <2 x i16> [[TMP0]], splat (i16 -1)
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[NOT_I]] to i32
+// RV64-NEXT: ret i32 [[TMP1]]
+//
+uint16x2_t test_pnot_u16x2(uint16x2_t a) {
+ return __riscv_pnot_u16x2(a);
}
-// RV32-LABEL: define dso_local i64 @test_psub_u16x4(
+/* Packed Logical Operations (64-bit) */
+
+// RV32-LABEL: define dso_local i64 @test_pand_i8x8(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
+// RV32-NEXT: [[AND_I1:%.*]] = and i64 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i64 [[AND_I1]]
//
-// RV64-LABEL: define dso_local i64 @test_psub_u16x4(
+// RV64-LABEL: define dso_local i64 @test_pand_i8x8(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
+// RV64-NEXT: [[AND_I1:%.*]] = and i64 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i64 [[AND_I1]]
//
-uint16x4_t test_psub_u16x4(uint16x4_t a, uint16x4_t b) {
- return __riscv_psub_u16x4(a, b);
+int8x8_t test_pand_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_pand_i8x8(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psub_i32x2(
+// RV32-LABEL: define dso_local i64 @test_pand_u8x8(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
+// RV32-NEXT: [[AND_I1:%.*]] = and i64 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i64 [[AND_I1]]
//
-// RV64-LABEL: define dso_local i64 @test_psub_i32x2(
+// RV64-LABEL: define dso_local i64 @test_pand_u8x8(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
+// RV64-NEXT: [[AND_I1:%.*]] = and i64 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i64 [[AND_I1]]
//
-int32x2_t test_psub_i32x2(int32x2_t a, int32x2_t b) {
- return __riscv_psub_i32x2(a, b);
+uint8x8_t test_pand_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_pand_u8x8(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psub_u32x2(
+// RV32-LABEL: define dso_local i64 @test_pand_i16x4(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
+// RV32-NEXT: [[AND_I1:%.*]] = and i64 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i64 [[AND_I1]]
//
-// RV64-LABEL: define dso_local i64 @test_psub_u32x2(
+// RV64-LABEL: define dso_local i64 @test_pand_i16x4(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
+// RV64-NEXT: [[AND_I1:%.*]] = and i64 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i64 [[AND_I1]]
//
-uint32x2_t test_psub_u32x2(uint32x2_t a, uint32x2_t b) {
- return __riscv_psub_u32x2(a, b);
+int16x4_t test_pand_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_pand_i16x4(a, b);
}
-/* 32-bit Packed Shifts */
-
-// RV32-LABEL: define dso_local i32 @test_psll_s_i8x4(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_pand_u16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
-// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
-// RV32-NEXT: ret i32 [[TMP4]]
+// RV32-NEXT: [[AND_I1:%.*]] = and i64 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i64 [[AND_I1]]
//
-// RV64-LABEL: define dso_local i32 @test_psll_s_i8x4(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_pand_u16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
-// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
-// RV64-NEXT: ret i32 [[TMP4]]
+// RV64-NEXT: [[AND_I1:%.*]] = and i64 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i64 [[AND_I1]]
//
-int8x4_t test_psll_s_i8x4(int8x4_t a, unsigned shamt) {
- return __riscv_psll_s_i8x4(a, shamt);
+uint16x4_t test_pand_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_pand_u16x4(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psll_s_u8x4(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_pand_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
-// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
-// RV32-NEXT: ret i32 [[TMP4]]
+// RV32-NEXT: [[AND_I1:%.*]] = and i64 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i64 [[AND_I1]]
//
-// RV64-LABEL: define dso_local i32 @test_psll_s_u8x4(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_pand_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
-// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
-// RV64-NEXT: ret i32 [[TMP4]]
+// RV64-NEXT: [[AND_I1:%.*]] = and i64 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i64 [[AND_I1]]
//
-uint8x4_t test_psll_s_u8x4(uint8x4_t a, unsigned shamt) {
- return __riscv_psll_s_u8x4(a, shamt);
+int32x2_t test_pand_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_pand_i32x2(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psll_s_i16x2(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_pand_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
-// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
-// RV32-NEXT: ret i32 [[TMP4]]
+// RV32-NEXT: [[AND_I1:%.*]] = and i64 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i64 [[AND_I1]]
//
-// RV64-LABEL: define dso_local i32 @test_psll_s_i16x2(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_pand_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
-// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
-// RV64-NEXT: ret i32 [[TMP4]]
+// RV64-NEXT: [[AND_I1:%.*]] = and i64 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i64 [[AND_I1]]
//
-int16x2_t test_psll_s_i16x2(int16x2_t a, unsigned shamt) {
- return __riscv_psll_s_i16x2(a, shamt);
+uint32x2_t test_pand_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_pand_u32x2(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psll_s_u16x2(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_por_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
-// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
-// RV32-NEXT: ret i32 [[TMP4]]
+// RV32-NEXT: [[OR_I1:%.*]] = or i64 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i64 [[OR_I1]]
//
-// RV64-LABEL: define dso_local i32 @test_psll_s_u16x2(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_por_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
-// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
-// RV64-NEXT: ret i32 [[TMP4]]
+// RV64-NEXT: [[OR_I1:%.*]] = or i64 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i64 [[OR_I1]]
//
-uint16x2_t test_psll_s_u16x2(uint16x2_t a, unsigned shamt) {
- return __riscv_psll_s_u16x2(a, shamt);
+int8x8_t test_por_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_por_i8x8(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psra_s_i8x4(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_por_u8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
-// RV32-NEXT: [[SHR_I:%.*]] = ashr <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
-// RV32-NEXT: ret i32 [[TMP4]]
+// RV32-NEXT: [[OR_I1:%.*]] = or i64 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i64 [[OR_I1]]
//
-// RV64-LABEL: define dso_local i32 @test_psra_s_i8x4(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_por_u8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
-// RV64-NEXT: [[SHR_I:%.*]] = ashr <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
-// RV64-NEXT: ret i32 [[TMP4]]
+// RV64-NEXT: [[OR_I1:%.*]] = or i64 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i64 [[OR_I1]]
//
-int8x4_t test_psra_s_i8x4(int8x4_t a, unsigned shamt) {
- return __riscv_psra_s_i8x4(a, shamt);
+uint8x8_t test_por_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_por_u8x8(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psrl_s_u8x4(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_por_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
-// RV32-NEXT: [[SHR_I:%.*]] = lshr <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
-// RV32-NEXT: ret i32 [[TMP4]]
+// RV32-NEXT: [[OR_I1:%.*]] = or i64 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i64 [[OR_I1]]
//
-// RV64-LABEL: define dso_local i32 @test_psrl_s_u8x4(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_por_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
-// RV64-NEXT: [[SHR_I:%.*]] = lshr <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
-// RV64-NEXT: ret i32 [[TMP4]]
+// RV64-NEXT: [[OR_I1:%.*]] = or i64 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i64 [[OR_I1]]
//
-uint8x4_t test_psrl_s_u8x4(uint8x4_t a, unsigned shamt) {
- return __riscv_psrl_s_u8x4(a, shamt);
+int16x4_t test_por_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_por_i16x4(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psra_s_i16x2(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_por_u16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
-// RV32-NEXT: [[SHR_I:%.*]] = ashr <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
-// RV32-NEXT: ret i32 [[TMP4]]
+// RV32-NEXT: [[OR_I1:%.*]] = or i64 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i64 [[OR_I1]]
//
-// RV64-LABEL: define dso_local i32 @test_psra_s_i16x2(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_por_u16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
-// RV64-NEXT: [[SHR_I:%.*]] = ashr <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
-// RV64-NEXT: ret i32 [[TMP4]]
+// RV64-NEXT: [[OR_I1:%.*]] = or i64 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i64 [[OR_I1]]
//
-int16x2_t test_psra_s_i16x2(int16x2_t a, unsigned shamt) {
- return __riscv_psra_s_i16x2(a, shamt);
+uint16x4_t test_por_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_por_u16x4(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psrl_s_u16x2(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_por_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
-// RV32-NEXT: [[SHR_I:%.*]] = lshr <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
-// RV32-NEXT: ret i32 [[TMP4]]
+// RV32-NEXT: [[OR_I1:%.*]] = or i64 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i64 [[OR_I1]]
//
-// RV64-LABEL: define dso_local i32 @test_psrl_s_u16x2(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_por_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
-// RV64-NEXT: [[SHR_I:%.*]] = lshr <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
-// RV64-NEXT: ret i32 [[TMP4]]
+// RV64-NEXT: [[OR_I1:%.*]] = or i64 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i64 [[OR_I1]]
//
-uint16x2_t test_psrl_s_u16x2(uint16x2_t a, unsigned shamt) {
- return __riscv_psrl_s_u16x2(a, shamt);
+int32x2_t test_por_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_por_i32x2(a, b);
}
-/* 64-bit Packed Shifts */
-
-// RV32-LABEL: define dso_local i64 @test_psll_s_i8x8(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_por_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
-// RV32-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
-// RV32-NEXT: ret i64 [[TMP4]]
+// RV32-NEXT: [[OR_I1:%.*]] = or i64 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i64 [[OR_I1]]
+//
+// RV64-LABEL: define dso_local i64 @test_por_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[OR_I1:%.*]] = or i64 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i64 [[OR_I1]]
//
-// RV64-LABEL: define dso_local i64 @test_psll_s_i8x8(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+uint32x2_t test_por_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_por_u32x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pxor_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[XOR_I1:%.*]] = xor i64 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i64 [[XOR_I1]]
+//
+// RV64-LABEL: define dso_local i64 @test_pxor_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
-// RV64-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
-// RV64-NEXT: ret i64 [[TMP4]]
+// RV64-NEXT: [[XOR_I1:%.*]] = xor i64 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i64 [[XOR_I1]]
//
-int8x8_t test_psll_s_i8x8(int8x8_t a, unsigned shamt) {
- return __riscv_psll_s_i8x8(a, shamt);
+int8x8_t test_pxor_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_pxor_i8x8(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psll_s_u8x8(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_pxor_u8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
-// RV32-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
-// RV32-NEXT: ret i64 [[TMP4]]
+// RV32-NEXT: [[XOR_I1:%.*]] = xor i64 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i64 [[XOR_I1]]
//
-// RV64-LABEL: define dso_local i64 @test_psll_s_u8x8(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_pxor_u8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
-// RV64-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
-// RV64-NEXT: ret i64 [[TMP4]]
+// RV64-NEXT: [[XOR_I1:%.*]] = xor i64 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i64 [[XOR_I1]]
//
-uint8x8_t test_psll_s_u8x8(uint8x8_t a, unsigned shamt) {
- return __riscv_psll_s_u8x8(a, shamt);
+uint8x8_t test_pxor_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_pxor_u8x8(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psll_s_i16x4(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_pxor_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
-// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
-// RV32-NEXT: ret i64 [[TMP4]]
+// RV32-NEXT: [[XOR_I1:%.*]] = xor i64 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i64 [[XOR_I1]]
//
-// RV64-LABEL: define dso_local i64 @test_psll_s_i16x4(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_pxor_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
-// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
-// RV64-NEXT: ret i64 [[TMP4]]
+// RV64-NEXT: [[XOR_I1:%.*]] = xor i64 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i64 [[XOR_I1]]
//
-int16x4_t test_psll_s_i16x4(int16x4_t a, unsigned shamt) {
- return __riscv_psll_s_i16x4(a, shamt);
+int16x4_t test_pxor_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_pxor_i16x4(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psll_s_u16x4(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_pxor_u16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
-// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
-// RV32-NEXT: ret i64 [[TMP4]]
+// RV32-NEXT: [[XOR_I1:%.*]] = xor i64 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i64 [[XOR_I1]]
//
-// RV64-LABEL: define dso_local i64 @test_psll_s_u16x4(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_pxor_u16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
-// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
-// RV64-NEXT: ret i64 [[TMP4]]
+// RV64-NEXT: [[XOR_I1:%.*]] = xor i64 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i64 [[XOR_I1]]
//
-uint16x4_t test_psll_s_u16x4(uint16x4_t a, unsigned shamt) {
- return __riscv_psll_s_u16x4(a, shamt);
+uint16x4_t test_pxor_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_pxor_u16x4(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psll_s_i32x2(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_pxor_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
-// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
-// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
-// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
-// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
-// RV32-NEXT: ret i64 [[TMP1]]
+// RV32-NEXT: [[XOR_I1:%.*]] = xor i64 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i64 [[XOR_I1]]
//
-// RV64-LABEL: define dso_local i64 @test_psll_s_i32x2(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_pxor_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
-// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
-// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
-// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
-// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
-// RV64-NEXT: ret i64 [[TMP1]]
+// RV64-NEXT: [[XOR_I1:%.*]] = xor i64 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i64 [[XOR_I1]]
//
-int32x2_t test_psll_s_i32x2(int32x2_t a, unsigned shamt) {
- return __riscv_psll_s_i32x2(a, shamt);
+int32x2_t test_pxor_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_pxor_i32x2(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psll_s_u32x2(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_pxor_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
-// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
-// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
-// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
-// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
-// RV32-NEXT: ret i64 [[TMP1]]
+// RV32-NEXT: [[XOR_I1:%.*]] = xor i64 [[A_COERCE]], [[B_COERCE]]
+// RV32-NEXT: ret i64 [[XOR_I1]]
//
-// RV64-LABEL: define dso_local i64 @test_psll_s_u32x2(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_pxor_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
-// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
-// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
-// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
-// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
-// RV64-NEXT: ret i64 [[TMP1]]
+// RV64-NEXT: [[XOR_I1:%.*]] = xor i64 [[A_COERCE]], [[B_COERCE]]
+// RV64-NEXT: ret i64 [[XOR_I1]]
//
-uint32x2_t test_psll_s_u32x2(uint32x2_t a, unsigned shamt) {
- return __riscv_psll_s_u32x2(a, shamt);
+uint32x2_t test_pxor_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_pxor_u32x2(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psra_s_i8x8(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_pnot_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
-// RV32-NEXT: [[SHR_I:%.*]] = ashr <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
-// RV32-NEXT: ret i64 [[TMP4]]
+// RV32-NEXT: [[NOT_I:%.*]] = xor <8 x i8> [[TMP0]], splat (i8 -1)
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[NOT_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
//
-// RV64-LABEL: define dso_local i64 @test_psra_s_i8x8(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_pnot_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
-// RV64-NEXT: [[SHR_I:%.*]] = ashr <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
-// RV64-NEXT: ret i64 [[TMP4]]
+// RV64-NEXT: [[NOT_I:%.*]] = xor <8 x i8> [[TMP0]], splat (i8 -1)
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[NOT_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
//
-int8x8_t test_psra_s_i8x8(int8x8_t a, unsigned shamt) {
- return __riscv_psra_s_i8x8(a, shamt);
+int8x8_t test_pnot_i8x8(int8x8_t a) {
+ return __riscv_pnot_i8x8(a);
}
-// RV32-LABEL: define dso_local i64 @test_psrl_s_u8x8(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_pnot_u8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
-// RV32-NEXT: [[SHR_I:%.*]] = lshr <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
-// RV32-NEXT: ret i64 [[TMP4]]
+// RV32-NEXT: [[NOT_I:%.*]] = xor <8 x i8> [[TMP0]], splat (i8 -1)
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[NOT_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
//
-// RV64-LABEL: define dso_local i64 @test_psrl_s_u8x8(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_pnot_u8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
-// RV64-NEXT: [[SHR_I:%.*]] = lshr <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
-// RV64-NEXT: ret i64 [[TMP4]]
+// RV64-NEXT: [[NOT_I:%.*]] = xor <8 x i8> [[TMP0]], splat (i8 -1)
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[NOT_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
//
-uint8x8_t test_psrl_s_u8x8(uint8x8_t a, unsigned shamt) {
- return __riscv_psrl_s_u8x8(a, shamt);
+uint8x8_t test_pnot_u8x8(uint8x8_t a) {
+ return __riscv_pnot_u8x8(a);
}
-// RV32-LABEL: define dso_local i64 @test_psra_s_i16x4(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_pnot_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
-// RV32-NEXT: [[SHR_I:%.*]] = ashr <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
-// RV32-NEXT: ret i64 [[TMP4]]
+// RV32-NEXT: [[NOT_I:%.*]] = xor <4 x i16> [[TMP0]], splat (i16 -1)
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[NOT_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
//
-// RV64-LABEL: define dso_local i64 @test_psra_s_i16x4(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_pnot_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
-// RV64-NEXT: [[SHR_I:%.*]] = ashr <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
-// RV64-NEXT: ret i64 [[TMP4]]
+// RV64-NEXT: [[NOT_I:%.*]] = xor <4 x i16> [[TMP0]], splat (i16 -1)
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[NOT_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
//
-int16x4_t test_psra_s_i16x4(int16x4_t a, unsigned shamt) {
- return __riscv_psra_s_i16x4(a, shamt);
+int16x4_t test_pnot_i16x4(int16x4_t a) {
+ return __riscv_pnot_i16x4(a);
}
-// RV32-LABEL: define dso_local i64 @test_psrl_s_u16x4(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_pnot_u16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
-// RV32-NEXT: [[SHR_I:%.*]] = lshr <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
-// RV32-NEXT: ret i64 [[TMP4]]
+// RV32-NEXT: [[NOT_I:%.*]] = xor <4 x i16> [[TMP0]], splat (i16 -1)
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[NOT_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
//
-// RV64-LABEL: define dso_local i64 @test_psrl_s_u16x4(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_pnot_u16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
-// RV64-NEXT: [[SHR_I:%.*]] = lshr <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
-// RV64-NEXT: ret i64 [[TMP4]]
+// RV64-NEXT: [[NOT_I:%.*]] = xor <4 x i16> [[TMP0]], splat (i16 -1)
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[NOT_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
//
-uint16x4_t test_psrl_s_u16x4(uint16x4_t a, unsigned shamt) {
- return __riscv_psrl_s_u16x4(a, shamt);
+uint16x4_t test_pnot_u16x4(uint16x4_t a) {
+ return __riscv_pnot_u16x4(a);
}
-// RV32-LABEL: define dso_local i64 @test_psra_s_i32x2(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_pnot_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
-// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
-// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
-// RV32-NEXT: [[SHR_I:%.*]] = ashr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
-// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
+// RV32-NEXT: [[NOT_I:%.*]] = xor <2 x i32> [[TMP0]], splat (i32 -1)
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[NOT_I]] to i64
// RV32-NEXT: ret i64 [[TMP1]]
//
-// RV64-LABEL: define dso_local i64 @test_psra_s_i32x2(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_pnot_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
-// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
-// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
-// RV64-NEXT: [[SHR_I:%.*]] = ashr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
-// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
+// RV64-NEXT: [[NOT_I:%.*]] = xor <2 x i32> [[TMP0]], splat (i32 -1)
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[NOT_I]] to i64
// RV64-NEXT: ret i64 [[TMP1]]
//
-int32x2_t test_psra_s_i32x2(int32x2_t a, unsigned shamt) {
- return __riscv_psra_s_i32x2(a, shamt);
+int32x2_t test_pnot_i32x2(int32x2_t a) {
+ return __riscv_pnot_i32x2(a);
}
-// RV32-LABEL: define dso_local i64 @test_psrl_s_u32x2(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_pnot_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
-// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
-// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
-// RV32-NEXT: [[SHR_I:%.*]] = lshr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
-// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
+// RV32-NEXT: [[NOT_I:%.*]] = xor <2 x i32> [[TMP0]], splat (i32 -1)
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[NOT_I]] to i64
// RV32-NEXT: ret i64 [[TMP1]]
//
-// RV64-LABEL: define dso_local i64 @test_psrl_s_u32x2(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_pnot_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
-// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
-// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
-// RV64-NEXT: [[SHR_I:%.*]] = lshr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
-// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
+// RV64-NEXT: [[NOT_I:%.*]] = xor <2 x i32> [[TMP0]], splat (i32 -1)
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[NOT_I]] to i64
// RV64-NEXT: ret i64 [[TMP1]]
//
-uint32x2_t test_psrl_s_u32x2(uint32x2_t a, unsigned shamt) {
- return __riscv_psrl_s_u32x2(a, shamt);
+uint32x2_t test_pnot_u32x2(uint32x2_t a) {
+ return __riscv_pnot_u32x2(a);
}
>From f8ccdff499e2d3bbb4470cfda7ea83b0e2710bb0 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Wed, 20 May 2026 10:00:04 +0800
Subject: [PATCH 09/19] [cross-project-tests][RISCV][P-ext] Add riscv_packed.h
intrinsic test
---
.../intrinsic-header-tests/riscv_packed.c | 1016 +++++++++++++++++
1 file changed, 1016 insertions(+)
create mode 100644 cross-project-tests/intrinsic-header-tests/riscv_packed.c
diff --git a/cross-project-tests/intrinsic-header-tests/riscv_packed.c b/cross-project-tests/intrinsic-header-tests/riscv_packed.c
new file mode 100644
index 0000000000000..ba4973620c2c5
--- /dev/null
+++ b/cross-project-tests/intrinsic-header-tests/riscv_packed.c
@@ -0,0 +1,1016 @@
+// REQUIRES: riscv-registered-target
+// expected-no-diagnostics
+
+// RUN: %clang %s -O2 -S -o - --target=riscv32 \
+// RUN: -menable-experimental-extensions -march=rv32i_p0p21 -Werror \
+// RUN: | FileCheck %s --check-prefixes=CHECK,RV32
+// RUN: %clang %s -O2 -S -o - --target=riscv64 \
+// RUN: -menable-experimental-extensions -march=rv64i_p0p21 -Werror \
+// RUN: | FileCheck %s --check-prefixes=CHECK,RV64
+
+#include <riscv_packed.h>
+
+// CHECK-LABEL: test_pmv_s_u8x4:
+// CHECK: pmv.bs
+uint8x4_t test_pmv_s_u8x4(uint8_t x) { return __riscv_pmv_s_u8x4(x); }
+
+// CHECK-LABEL: test_pmv_s_i8x4:
+// CHECK: pmv.bs
+int8x4_t test_pmv_s_i8x4(int8_t x) { return __riscv_pmv_s_i8x4(x); }
+
+// CHECK-LABEL: test_pmv_s_u16x2:
+// CHECK: pmv.hs
+uint16x2_t test_pmv_s_u16x2(uint16_t x) { return __riscv_pmv_s_u16x2(x); }
+
+// CHECK-LABEL: test_pmv_s_i16x2:
+// CHECK: pmv.hs
+int16x2_t test_pmv_s_i16x2(int16_t x) { return __riscv_pmv_s_i16x2(x); }
+
+// TODO: On RV64, the 32-bit packed constant splat emits `lui`+`addi` instead
+// of `pli.b`/`pli.h` or `plui.h`.
+// CHECK-LABEL: test_pmv_s_u8x4_imm:
+// RV32: pli.b
+// RV64: lui
+int8x4_t test_pmv_s_u8x4_imm(void) { return __riscv_pmv_s_u8x4(5); }
+
+// CHECK-LABEL: test_pmv_s_i8x4_imm:
+// RV32: pli.b
+// RV64: lui
+int8x4_t test_pmv_s_i8x4_imm(void) { return __riscv_pmv_s_i8x4(-3); }
+
+// CHECK-LABEL: test_pmv_s_u16x2_imm:
+// RV32: pli.h
+// RV64: lui
+uint16x2_t test_pmv_s_u16x2_imm(void) { return __riscv_pmv_s_u16x2(42); }
+
+// CHECK-LABEL: test_pmv_s_i16x2_imm:
+// RV32: pli.h
+// RV64: lui
+int16x2_t test_pmv_s_i16x2_imm(void) { return __riscv_pmv_s_i16x2(-5); }
+
+// CHECK-LABEL: test_pmv_s_u16x2_imm_hi:
+// RV32: plui.h
+// RV64: lui
+uint16x2_t test_pmv_s_u16x2_imm_hi(void) { return __riscv_pmv_s_u16x2(0x3600); }
+
+// CHECK-LABEL: test_pmv_s_i16x2_imm_hi:
+// RV32: plui.h
+// RV64: lui
+int16x2_t test_pmv_s_i16x2_imm_hi(void) { return __riscv_pmv_s_i16x2(0x3600); }
+
+// CHECK-LABEL: test_pmv_s_u8x8:
+// RV32: pmv.dbs
+// RV64: pmv.bs
+uint8x8_t test_pmv_s_u8x8(uint8_t x) { return __riscv_pmv_s_u8x8(x); }
+
+// CHECK-LABEL: test_pmv_s_i8x8:
+// RV32: pmv.dbs
+// RV64: pmv.bs
+int8x8_t test_pmv_s_i8x8(int8_t x) { return __riscv_pmv_s_i8x8(x); }
+
+// CHECK-LABEL: test_pmv_s_u16x4:
+// RV32: pmv.dhs
+// RV64: pmv.hs
+uint16x4_t test_pmv_s_u16x4(uint16_t x) { return __riscv_pmv_s_u16x4(x); }
+
+// CHECK-LABEL: test_pmv_s_i16x4:
+// RV32: pmv.dhs
+// RV64: pmv.hs
+int16x4_t test_pmv_s_i16x4(int16_t x) { return __riscv_pmv_s_i16x4(x); }
+
+// TODO: On RV32, the 32x2 variable splat emits a plain `mv` instead of
+// `padd.dws` with rs1_p=x0.
+// CHECK-LABEL: test_pmv_s_u32x2:
+// RV32: mv{{[[:space:]]}}
+// RV64: pmv.ws
+uint32x2_t test_pmv_s_u32x2(uint32_t x) { return __riscv_pmv_s_u32x2(x); }
+
+// CHECK-LABEL: test_pmv_s_i32x2:
+// RV32: mv{{[[:space:]]}}
+// RV64: pmv.ws
+int32x2_t test_pmv_s_i32x2(int32_t x) { return __riscv_pmv_s_i32x2(x); }
+
+// TODO: On RV32, the 64-bit packed constant splat emits two `pli.b`/`pli.h`/
+// `plui.h` instead of one `pli.db`/`pli.dh`/`plui.dh`.
+// CHECK-LABEL: test_pmv_s_u8x8_imm:
+// RV32-COUNT-2: pli.b
+// RV64: pli.b
+uint8x8_t test_pmv_s_u8x8_imm(void) { return __riscv_pmv_s_u8x8(5); }
+
+// CHECK-LABEL: test_pmv_s_i8x8_imm:
+// RV32-COUNT-2: pli.b
+// RV64: pli.b
+int8x8_t test_pmv_s_i8x8_imm(void) { return __riscv_pmv_s_i8x8(-3); }
+
+// CHECK-LABEL: test_pmv_s_u16x4_imm:
+// RV32-COUNT-2: pli.h
+// RV64: pli.h
+uint16x4_t test_pmv_s_u16x4_imm(void) { return __riscv_pmv_s_u16x4(42); }
+
+// CHECK-LABEL: test_pmv_s_i16x4_imm:
+// RV32-COUNT-2: pli.h
+// RV64: pli.h
+int16x4_t test_pmv_s_i16x4_imm(void) { return __riscv_pmv_s_i16x4(-5); }
+
+// CHECK-LABEL: test_pmv_s_u16x4_imm_hi:
+// RV32-COUNT-2: plui.h
+// RV64: plui.h
+uint16x4_t test_pmv_s_u16x4_imm_hi(void) { return __riscv_pmv_s_u16x4(0x3600); }
+
+// CHECK-LABEL: test_pmv_s_i16x4_imm_hi:
+// RV32-COUNT-2: plui.h
+// RV64: plui.h
+int16x4_t test_pmv_s_i16x4_imm_hi(void) { return __riscv_pmv_s_i16x4(0x3600); }
+
+// Note: Constants that fit `addi`'s 12-bit immediate fold to 2x `li`.
+// Larger constants follow `lui`+`addi`+`mv`; see `_imm_big` below.
+// CHECK-LABEL: test_pmv_s_u32x2_imm:
+// RV32-COUNT-2: li{{[[:space:]]}}
+// RV64: pli.w
+uint32x2_t test_pmv_s_u32x2_imm(void) { return __riscv_pmv_s_u32x2(42); }
+
+// CHECK-LABEL: test_pmv_s_i32x2_imm:
+// RV32-COUNT-2: li{{[[:space:]]}}
+// RV64: pli.w
+int32x2_t test_pmv_s_i32x2_imm(void) { return __riscv_pmv_s_i32x2(-5); }
+
+// CHECK-LABEL: test_pmv_s_u32x2_imm_big:
+// RV32: lui
+// RV32-NEXT: addi
+// RV32-NEXT: mv{{[[:space:]]}}
+// RV32-NEXT: ret
+uint32x2_t test_pmv_s_u32x2_imm_big(void) {
+ return __riscv_pmv_s_u32x2(0x12345);
+}
+
+// CHECK-LABEL: test_pmv_s_i32x2_imm_big:
+// RV32: lui
+// RV32-NEXT: addi
+// RV32-NEXT: mv{{[[:space:]]}}
+// RV32-NEXT: ret
+int32x2_t test_pmv_s_i32x2_imm_big(void) {
+ return __riscv_pmv_s_i32x2(0x12345);
+}
+
+// CHECK-LABEL: test_psll_s_u8x4:
+// CHECK: psll.bs
+uint8x4_t test_psll_s_u8x4(uint8x4_t a, unsigned n) {
+ return __riscv_psll_s_u8x4(a, n);
+}
+
+// CHECK-LABEL: test_psll_s_i8x4:
+// CHECK: psll.bs
+int8x4_t test_psll_s_i8x4(int8x4_t a, unsigned n) {
+ return __riscv_psll_s_i8x4(a, n);
+}
+
+// CHECK-LABEL: test_psll_s_u16x2:
+// CHECK: psll.hs
+uint16x2_t test_psll_s_u16x2(uint16x2_t a, unsigned n) {
+ return __riscv_psll_s_u16x2(a, n);
+}
+
+// CHECK-LABEL: test_psll_s_i16x2:
+// CHECK: psll.hs
+int16x2_t test_psll_s_i16x2(int16x2_t a, unsigned n) {
+ return __riscv_psll_s_i16x2(a, n);
+}
+
+// CHECK-LABEL: test_psrl_s_u8x4:
+// CHECK: psrl.bs
+uint8x4_t test_psrl_s_u8x4(uint8x4_t a, unsigned n) {
+ return __riscv_psrl_s_u8x4(a, n);
+}
+
+// CHECK-LABEL: test_psrl_s_u16x2:
+// CHECK: psrl.hs
+uint16x2_t test_psrl_s_u16x2(uint16x2_t a, unsigned n) {
+ return __riscv_psrl_s_u16x2(a, n);
+}
+
+// CHECK-LABEL: test_psra_s_i8x4:
+// CHECK: psra.bs
+int8x4_t test_psra_s_i8x4(int8x4_t a, unsigned n) {
+ return __riscv_psra_s_i8x4(a, n);
+}
+
+// CHECK-LABEL: test_psra_s_i16x2:
+// CHECK: psra.hs
+int16x2_t test_psra_s_i16x2(int16x2_t a, unsigned n) {
+ return __riscv_psra_s_i16x2(a, n);
+}
+
+// CHECK-LABEL: test_psll_s_u8x4_imm:
+// CHECK: pslli.b{{[[:space:]]+}}{{.*}}, 2
+uint8x4_t test_psll_s_u8x4_imm(uint8x4_t a) {
+ return __riscv_psll_s_u8x4(a, 2);
+}
+
+// CHECK-LABEL: test_psll_s_i8x4_imm:
+// CHECK: pslli.b{{[[:space:]]+}}{{.*}}, 3
+int8x4_t test_psll_s_i8x4_imm(int8x4_t a) { return __riscv_psll_s_i8x4(a, 3); }
+
+// CHECK-LABEL: test_psll_s_u16x2_imm:
+// CHECK: pslli.h{{[[:space:]]+}}{{.*}}, 5
+uint16x2_t test_psll_s_u16x2_imm(uint16x2_t a) {
+ return __riscv_psll_s_u16x2(a, 5);
+}
+
+// CHECK-LABEL: test_psll_s_i16x2_imm:
+// CHECK: pslli.h{{[[:space:]]+}}{{.*}}, 7
+int16x2_t test_psll_s_i16x2_imm(int16x2_t a) {
+ return __riscv_psll_s_i16x2(a, 7);
+}
+
+// CHECK-LABEL: test_psrl_s_u8x4_imm:
+// CHECK: psrli.b{{[[:space:]]+}}{{.*}}, 2
+uint8x4_t test_psrl_s_u8x4_imm(uint8x4_t a) {
+ return __riscv_psrl_s_u8x4(a, 2);
+}
+
+// CHECK-LABEL: test_psrl_s_u16x2_imm:
+// CHECK: psrli.h{{[[:space:]]+}}{{.*}}, 3
+uint16x2_t test_psrl_s_u16x2_imm(uint16x2_t a) {
+ return __riscv_psrl_s_u16x2(a, 3);
+}
+
+// CHECK-LABEL: test_psra_s_i8x4_imm:
+// CHECK: psrai.b{{[[:space:]]+}}{{.*}}, 4
+int8x4_t test_psra_s_i8x4_imm(int8x4_t a) { return __riscv_psra_s_i8x4(a, 4); }
+
+// CHECK-LABEL: test_psra_s_i16x2_imm:
+// CHECK: psrai.h{{[[:space:]]+}}{{.*}}, 5
+int16x2_t test_psra_s_i16x2_imm(int16x2_t a) {
+ return __riscv_psra_s_i16x2(a, 5);
+}
+
+// CHECK-LABEL: test_psll_s_u8x8:
+// RV32: psll.dbs
+// RV64: psll.bs
+uint8x8_t test_psll_s_u8x8(uint8x8_t a, unsigned n) {
+ return __riscv_psll_s_u8x8(a, n);
+}
+
+// CHECK-LABEL: test_psll_s_i8x8:
+// RV32: psll.dbs
+// RV64: psll.bs
+int8x8_t test_psll_s_i8x8(int8x8_t a, unsigned n) {
+ return __riscv_psll_s_i8x8(a, n);
+}
+
+// CHECK-LABEL: test_psll_s_u16x4:
+// RV32: psll.dhs
+// RV64: psll.hs
+uint16x4_t test_psll_s_u16x4(uint16x4_t a, unsigned n) {
+ return __riscv_psll_s_u16x4(a, n);
+}
+
+// CHECK-LABEL: test_psll_s_i16x4:
+// RV32: psll.dhs
+// RV64: psll.hs
+int16x4_t test_psll_s_i16x4(int16x4_t a, unsigned n) {
+ return __riscv_psll_s_i16x4(a, n);
+}
+
+// CHECK-LABEL: test_psll_s_u32x2:
+// RV32: psll.dws
+// RV64: psll.ws
+uint32x2_t test_psll_s_u32x2(uint32x2_t a, unsigned n) {
+ return __riscv_psll_s_u32x2(a, n);
+}
+
+// CHECK-LABEL: test_psll_s_i32x2:
+// RV32: psll.dws
+// RV64: psll.ws
+int32x2_t test_psll_s_i32x2(int32x2_t a, unsigned n) {
+ return __riscv_psll_s_i32x2(a, n);
+}
+
+// CHECK-LABEL: test_psrl_s_u8x8:
+// RV32: psrl.dbs
+// RV64: psrl.bs
+uint8x8_t test_psrl_s_u8x8(uint8x8_t a, unsigned n) {
+ return __riscv_psrl_s_u8x8(a, n);
+}
+
+// CHECK-LABEL: test_psrl_s_u16x4:
+// RV32: psrl.dhs
+// RV64: psrl.hs
+uint16x4_t test_psrl_s_u16x4(uint16x4_t a, unsigned n) {
+ return __riscv_psrl_s_u16x4(a, n);
+}
+
+// CHECK-LABEL: test_psrl_s_u32x2:
+// RV32: psrl.dws
+// RV64: psrl.ws
+uint32x2_t test_psrl_s_u32x2(uint32x2_t a, unsigned n) {
+ return __riscv_psrl_s_u32x2(a, n);
+}
+
+// CHECK-LABEL: test_psra_s_i8x8:
+// RV32: psra.dbs
+// RV64: psra.bs
+int8x8_t test_psra_s_i8x8(int8x8_t a, unsigned n) {
+ return __riscv_psra_s_i8x8(a, n);
+}
+
+// CHECK-LABEL: test_psra_s_i16x4:
+// RV32: psra.dhs
+// RV64: psra.hs
+int16x4_t test_psra_s_i16x4(int16x4_t a, unsigned n) {
+ return __riscv_psra_s_i16x4(a, n);
+}
+
+// CHECK-LABEL: test_psra_s_i32x2:
+// RV32: psra.dws
+// RV64: psra.ws
+int32x2_t test_psra_s_i32x2(int32x2_t a, unsigned n) {
+ return __riscv_psra_s_i32x2(a, n);
+}
+
+// CHECK-LABEL: test_psll_s_u8x8_imm:
+// RV32: pslli.db{{[[:space:]]+}}{{.*}}, 2
+// RV64: pslli.b{{[[:space:]]+}}{{.*}}, 2
+uint8x8_t test_psll_s_u8x8_imm(uint8x8_t a) {
+ return __riscv_psll_s_u8x8(a, 2);
+}
+
+// CHECK-LABEL: test_psll_s_i8x8_imm:
+// RV32: pslli.db{{[[:space:]]+}}{{.*}}, 3
+// RV64: pslli.b{{[[:space:]]+}}{{.*}}, 3
+int8x8_t test_psll_s_i8x8_imm(int8x8_t a) { return __riscv_psll_s_i8x8(a, 3); }
+
+// CHECK-LABEL: test_psll_s_u16x4_imm:
+// RV32: pslli.dh{{[[:space:]]+}}{{.*}}, 4
+// RV64: pslli.h{{[[:space:]]+}}{{.*}}, 4
+uint16x4_t test_psll_s_u16x4_imm(uint16x4_t a) {
+ return __riscv_psll_s_u16x4(a, 4);
+}
+
+// CHECK-LABEL: test_psll_s_i16x4_imm:
+// RV32: pslli.dh{{[[:space:]]+}}{{.*}}, 5
+// RV64: pslli.h{{[[:space:]]+}}{{.*}}, 5
+int16x4_t test_psll_s_i16x4_imm(int16x4_t a) {
+ return __riscv_psll_s_i16x4(a, 5);
+}
+
+// CHECK-LABEL: test_psll_s_u32x2_imm:
+// RV32: pslli.dw{{[[:space:]]+}}{{.*}}, 7
+// RV64: pslli.w{{[[:space:]]+}}{{.*}}, 7
+uint32x2_t test_psll_s_u32x2_imm(uint32x2_t a) {
+ return __riscv_psll_s_u32x2(a, 7);
+}
+
+// CHECK-LABEL: test_psll_s_i32x2_imm:
+// RV32: pslli.dw{{[[:space:]]+}}{{.*}}, 9
+// RV64: pslli.w{{[[:space:]]+}}{{.*}}, 9
+int32x2_t test_psll_s_i32x2_imm(int32x2_t a) {
+ return __riscv_psll_s_i32x2(a, 9);
+}
+
+// CHECK-LABEL: test_psrl_s_u8x8_imm:
+// RV32: psrli.db{{[[:space:]]+}}{{.*}}, 2
+// RV64: psrli.b{{[[:space:]]+}}{{.*}}, 2
+uint8x8_t test_psrl_s_u8x8_imm(uint8x8_t a) {
+ return __riscv_psrl_s_u8x8(a, 2);
+}
+
+// CHECK-LABEL: test_psrl_s_u16x4_imm:
+// RV32: psrli.dh{{[[:space:]]+}}{{.*}}, 3
+// RV64: psrli.h{{[[:space:]]+}}{{.*}}, 3
+uint16x4_t test_psrl_s_u16x4_imm(uint16x4_t a) {
+ return __riscv_psrl_s_u16x4(a, 3);
+}
+
+// CHECK-LABEL: test_psrl_s_u32x2_imm:
+// RV32: psrli.dw{{[[:space:]]+}}{{.*}}, 5
+// RV64: psrli.w{{[[:space:]]+}}{{.*}}, 5
+uint32x2_t test_psrl_s_u32x2_imm(uint32x2_t a) {
+ return __riscv_psrl_s_u32x2(a, 5);
+}
+
+// CHECK-LABEL: test_psra_s_i8x8_imm:
+// RV32: psrai.db{{[[:space:]]+}}{{.*}}, 4
+// RV64: psrai.b{{[[:space:]]+}}{{.*}}, 4
+int8x8_t test_psra_s_i8x8_imm(int8x8_t a) { return __riscv_psra_s_i8x8(a, 4); }
+
+// CHECK-LABEL: test_psra_s_i16x4_imm:
+// RV32: psrai.dh{{[[:space:]]+}}{{.*}}, 5
+// RV64: psrai.h{{[[:space:]]+}}{{.*}}, 5
+int16x4_t test_psra_s_i16x4_imm(int16x4_t a) {
+ return __riscv_psra_s_i16x4(a, 5);
+}
+
+// CHECK-LABEL: test_psra_s_i32x2_imm:
+// RV32: psrai.dw{{[[:space:]]+}}{{.*}}, 11
+// RV64: psrai.w{{[[:space:]]+}}{{.*}}, 11
+int32x2_t test_psra_s_i32x2_imm(int32x2_t a) {
+ return __riscv_psra_s_i32x2(a, 11);
+}
+
+// CHECK-LABEL: test_padd_s_u8x4:
+// CHECK: padd.bs
+uint8x4_t test_padd_s_u8x4(uint8x4_t a, uint8_t b) {
+ return __riscv_padd_s_u8x4(a, b);
+}
+
+// CHECK-LABEL: test_padd_s_i8x4:
+// CHECK: padd.bs
+int8x4_t test_padd_s_i8x4(int8x4_t a, int8_t b) {
+ return __riscv_padd_s_i8x4(a, b);
+}
+
+// CHECK-LABEL: test_padd_s_u16x2:
+// CHECK: padd.hs
+uint16x2_t test_padd_s_u16x2(uint16x2_t a, uint16_t b) {
+ return __riscv_padd_s_u16x2(a, b);
+}
+
+// CHECK-LABEL: test_padd_s_i16x2:
+// CHECK: padd.hs
+int16x2_t test_padd_s_i16x2(int16x2_t a, int16_t b) {
+ return __riscv_padd_s_i16x2(a, b);
+}
+
+// CHECK-LABEL: test_padd_s_u8x8:
+// RV32: padd.dbs
+// RV64: padd.bs
+uint8x8_t test_padd_s_u8x8(uint8x8_t a, uint8_t b) {
+ return __riscv_padd_s_u8x8(a, b);
+}
+
+// CHECK-LABEL: test_padd_s_i8x8:
+// RV32: padd.dbs
+// RV64: padd.bs
+int8x8_t test_padd_s_i8x8(int8x8_t a, int8_t b) {
+ return __riscv_padd_s_i8x8(a, b);
+}
+
+// CHECK-LABEL: test_padd_s_u16x4:
+// RV32: padd.dhs
+// RV64: padd.hs
+uint16x4_t test_padd_s_u16x4(uint16x4_t a, uint16_t b) {
+ return __riscv_padd_s_u16x4(a, b);
+}
+
+// CHECK-LABEL: test_padd_s_i16x4:
+// RV32: padd.dhs
+// RV64: padd.hs
+int16x4_t test_padd_s_i16x4(int16x4_t a, int16_t b) {
+ return __riscv_padd_s_i16x4(a, b);
+}
+
+// CHECK-LABEL: test_padd_s_u32x2:
+// RV32: padd.dws
+// RV64: padd.ws
+uint32x2_t test_padd_s_u32x2(uint32x2_t a, uint32_t b) {
+ return __riscv_padd_s_u32x2(a, b);
+}
+
+// CHECK-LABEL: test_padd_s_i32x2:
+// RV32: padd.dws
+// RV64: padd.ws
+int32x2_t test_padd_s_i32x2(int32x2_t a, int32_t b) {
+ return __riscv_padd_s_i32x2(a, b);
+}
+
+// CHECK-LABEL: test_padd_i8x4:
+// CHECK: padd.b
+int8x4_t test_padd_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_padd_i8x4(a, b);
+}
+
+// CHECK-LABEL: test_padd_u8x4:
+// CHECK: padd.b
+uint8x4_t test_padd_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_padd_u8x4(a, b);
+}
+
+// CHECK-LABEL: test_padd_i16x2:
+// CHECK: padd.h
+int16x2_t test_padd_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_padd_i16x2(a, b);
+}
+
+// CHECK-LABEL: test_padd_u16x2:
+// CHECK: padd.h
+uint16x2_t test_padd_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_padd_u16x2(a, b);
+}
+
+// CHECK-LABEL: test_psub_i8x4:
+// CHECK: psub.b
+int8x4_t test_psub_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_psub_i8x4(a, b);
+}
+
+// CHECK-LABEL: test_psub_u8x4:
+// CHECK: psub.b
+uint8x4_t test_psub_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_psub_u8x4(a, b);
+}
+
+// CHECK-LABEL: test_psub_i16x2:
+// CHECK: psub.h
+int16x2_t test_psub_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_psub_i16x2(a, b);
+}
+
+// CHECK-LABEL: test_psub_u16x2:
+// CHECK: psub.h
+uint16x2_t test_psub_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_psub_u16x2(a, b);
+}
+
+// CHECK-LABEL: test_pneg_i8x4:
+// CHECK: pneg.b
+int8x4_t test_pneg_i8x4(int8x4_t a) { return __riscv_pneg_i8x4(a); }
+
+// CHECK-LABEL: test_pneg_i16x2:
+// CHECK: pneg.h
+int16x2_t test_pneg_i16x2(int16x2_t a) { return __riscv_pneg_i16x2(a); }
+
+// CHECK-LABEL: test_padd_i8x8:
+// RV32: padd.db
+// RV64: padd.b
+int8x8_t test_padd_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_padd_i8x8(a, b);
+}
+
+// CHECK-LABEL: test_padd_u8x8:
+// RV32: padd.db
+// RV64: padd.b
+uint8x8_t test_padd_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_padd_u8x8(a, b);
+}
+
+// CHECK-LABEL: test_padd_i16x4:
+// RV32: padd.dh
+// RV64: padd.h
+int16x4_t test_padd_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_padd_i16x4(a, b);
+}
+
+// CHECK-LABEL: test_padd_u16x4:
+// RV32: padd.dh
+// RV64: padd.h
+uint16x4_t test_padd_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_padd_u16x4(a, b);
+}
+
+// CHECK-LABEL: test_padd_i32x2:
+// RV32: padd.dw
+// RV64: padd.w
+int32x2_t test_padd_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_padd_i32x2(a, b);
+}
+
+// CHECK-LABEL: test_padd_u32x2:
+// RV32: padd.dw
+// RV64: padd.w
+uint32x2_t test_padd_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_padd_u32x2(a, b);
+}
+
+// CHECK-LABEL: test_psub_i8x8:
+// RV32: psub.db
+// RV64: psub.b
+int8x8_t test_psub_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_psub_i8x8(a, b);
+}
+
+// CHECK-LABEL: test_psub_u8x8:
+// RV32: psub.db
+// RV64: psub.b
+uint8x8_t test_psub_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_psub_u8x8(a, b);
+}
+
+// CHECK-LABEL: test_psub_i16x4:
+// RV32: psub.dh
+// RV64: psub.h
+int16x4_t test_psub_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_psub_i16x4(a, b);
+}
+
+// CHECK-LABEL: test_psub_u16x4:
+// RV32: psub.dh
+// RV64: psub.h
+uint16x4_t test_psub_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_psub_u16x4(a, b);
+}
+
+// CHECK-LABEL: test_psub_i32x2:
+// RV32: psub.dw
+// RV64: psub.w
+int32x2_t test_psub_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_psub_i32x2(a, b);
+}
+
+// CHECK-LABEL: test_psub_u32x2:
+// RV32: psub.dw
+// RV64: psub.w
+uint32x2_t test_psub_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_psub_u32x2(a, b);
+}
+
+// CHECK-LABEL: test_pneg_i8x8:
+// RV32: pneg.db
+// RV64: pneg.b
+int8x8_t test_pneg_i8x8(int8x8_t a) { return __riscv_pneg_i8x8(a); }
+
+// CHECK-LABEL: test_pneg_i16x4:
+// RV32: pneg.dh
+// RV64: pneg.h
+int16x4_t test_pneg_i16x4(int16x4_t a) { return __riscv_pneg_i16x4(a); }
+
+// CHECK-LABEL: test_pneg_i32x2:
+// RV32: pneg.dw
+// RV64: pneg.w
+int32x2_t test_pneg_i32x2(int32x2_t a) { return __riscv_pneg_i32x2(a); }
+
+// CHECK-LABEL: test_pmin_i8x4:
+// CHECK: pmin.b
+int8x4_t test_pmin_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_pmin_i8x4(a, b);
+}
+
+// CHECK-LABEL: test_pmin_i16x2:
+// CHECK: pmin.h
+int16x2_t test_pmin_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_pmin_i16x2(a, b);
+}
+
+// CHECK-LABEL: test_pminu_u8x4:
+// CHECK: pminu.b
+uint8x4_t test_pminu_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_pminu_u8x4(a, b);
+}
+
+// CHECK-LABEL: test_pminu_u16x2:
+// CHECK: pminu.h
+uint16x2_t test_pminu_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_pminu_u16x2(a, b);
+}
+
+// CHECK-LABEL: test_pmax_i8x4:
+// CHECK: pmax.b
+int8x4_t test_pmax_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_pmax_i8x4(a, b);
+}
+
+// CHECK-LABEL: test_pmax_i16x2:
+// CHECK: pmax.h
+int16x2_t test_pmax_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_pmax_i16x2(a, b);
+}
+
+// CHECK-LABEL: test_pmaxu_u8x4:
+// CHECK: pmaxu.b
+uint8x4_t test_pmaxu_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_pmaxu_u8x4(a, b);
+}
+
+// CHECK-LABEL: test_pmaxu_u16x2:
+// CHECK: pmaxu.h
+uint16x2_t test_pmaxu_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_pmaxu_u16x2(a, b);
+}
+
+// CHECK-LABEL: test_pmin_i8x8:
+// RV32: pmin.db
+// RV64: pmin.b
+int8x8_t test_pmin_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_pmin_i8x8(a, b);
+}
+
+// CHECK-LABEL: test_pmin_i16x4:
+// RV32: pmin.dh
+// RV64: pmin.h
+int16x4_t test_pmin_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_pmin_i16x4(a, b);
+}
+
+// CHECK-LABEL: test_pmin_i32x2:
+// RV32: pmin.dw
+// RV64: pmin.w
+int32x2_t test_pmin_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_pmin_i32x2(a, b);
+}
+
+// CHECK-LABEL: test_pminu_u8x8:
+// RV32: pminu.db
+// RV64: pminu.b
+uint8x8_t test_pminu_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_pminu_u8x8(a, b);
+}
+
+// CHECK-LABEL: test_pminu_u16x4:
+// RV32: pminu.dh
+// RV64: pminu.h
+uint16x4_t test_pminu_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_pminu_u16x4(a, b);
+}
+
+// CHECK-LABEL: test_pminu_u32x2:
+// RV32: pminu.dw
+// RV64: pminu.w
+uint32x2_t test_pminu_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_pminu_u32x2(a, b);
+}
+
+// CHECK-LABEL: test_pmax_i8x8:
+// RV32: pmax.db
+// RV64: pmax.b
+int8x8_t test_pmax_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_pmax_i8x8(a, b);
+}
+
+// CHECK-LABEL: test_pmax_i16x4:
+// RV32: pmax.dh
+// RV64: pmax.h
+int16x4_t test_pmax_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_pmax_i16x4(a, b);
+}
+
+// CHECK-LABEL: test_pmax_i32x2:
+// RV32: pmax.dw
+// RV64: pmax.w
+int32x2_t test_pmax_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_pmax_i32x2(a, b);
+}
+
+// CHECK-LABEL: test_pmaxu_u8x8:
+// RV32: pmaxu.db
+// RV64: pmaxu.b
+uint8x8_t test_pmaxu_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_pmaxu_u8x8(a, b);
+}
+
+// CHECK-LABEL: test_pmaxu_u16x4:
+// RV32: pmaxu.dh
+// RV64: pmaxu.h
+uint16x4_t test_pmaxu_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_pmaxu_u16x4(a, b);
+}
+
+// CHECK-LABEL: test_pmaxu_u32x2:
+// RV32: pmaxu.dw
+// RV64: pmaxu.w
+uint32x2_t test_pmaxu_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_pmaxu_u32x2(a, b);
+}
+
+// CHECK-LABEL: test_pand_i8x4:
+// CHECK: and{{[[:space:]]}}
+int8x4_t test_pand_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_pand_i8x4(a, b);
+}
+
+// CHECK-LABEL: test_pand_u8x4:
+// CHECK: and{{[[:space:]]}}
+uint8x4_t test_pand_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_pand_u8x4(a, b);
+}
+
+// CHECK-LABEL: test_pand_i16x2:
+// CHECK: and{{[[:space:]]}}
+int16x2_t test_pand_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_pand_i16x2(a, b);
+}
+
+// CHECK-LABEL: test_pand_u16x2:
+// CHECK: and{{[[:space:]]}}
+uint16x2_t test_pand_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_pand_u16x2(a, b);
+}
+
+// CHECK-LABEL: test_por_i8x4:
+// CHECK: or{{[[:space:]]}}
+int8x4_t test_por_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_por_i8x4(a, b);
+}
+
+// CHECK-LABEL: test_por_u8x4:
+// CHECK: or{{[[:space:]]}}
+uint8x4_t test_por_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_por_u8x4(a, b);
+}
+
+// CHECK-LABEL: test_por_i16x2:
+// CHECK: or{{[[:space:]]}}
+int16x2_t test_por_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_por_i16x2(a, b);
+}
+
+// CHECK-LABEL: test_por_u16x2:
+// CHECK: or{{[[:space:]]}}
+uint16x2_t test_por_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_por_u16x2(a, b);
+}
+
+// CHECK-LABEL: test_pxor_i8x4:
+// CHECK: xor{{[[:space:]]}}
+int8x4_t test_pxor_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_pxor_i8x4(a, b);
+}
+
+// CHECK-LABEL: test_pxor_u8x4:
+// CHECK: xor{{[[:space:]]}}
+uint8x4_t test_pxor_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_pxor_u8x4(a, b);
+}
+
+// CHECK-LABEL: test_pxor_i16x2:
+// CHECK: xor{{[[:space:]]}}
+int16x2_t test_pxor_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_pxor_i16x2(a, b);
+}
+
+// CHECK-LABEL: test_pxor_u16x2:
+// CHECK: xor{{[[:space:]]}}
+uint16x2_t test_pxor_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_pxor_u16x2(a, b);
+}
+
+// TODO: On RV64, vector `NOT` on a 32-bit packed type emits `li`+`xor`
+// instead of the `not` alias.
+// CHECK-LABEL: test_pnot_i8x4:
+// RV32: not{{[[:space:]]}}
+// RV64: li
+// RV64-NEXT: xor{{[[:space:]]}}
+int8x4_t test_pnot_i8x4(int8x4_t a) { return __riscv_pnot_i8x4(a); }
+
+// CHECK-LABEL: test_pnot_u8x4:
+// RV32: not{{[[:space:]]}}
+// RV64: li
+// RV64-NEXT: xor{{[[:space:]]}}
+uint8x4_t test_pnot_u8x4(uint8x4_t a) { return __riscv_pnot_u8x4(a); }
+
+// CHECK-LABEL: test_pnot_i16x2:
+// RV32: not{{[[:space:]]}}
+// RV64: li
+// RV64-NEXT: xor{{[[:space:]]}}
+int16x2_t test_pnot_i16x2(int16x2_t a) { return __riscv_pnot_i16x2(a); }
+
+// CHECK-LABEL: test_pnot_u16x2:
+// RV32: not{{[[:space:]]}}
+// RV64: li
+// RV64-NEXT: xor{{[[:space:]]}}
+uint16x2_t test_pnot_u16x2(uint16x2_t a) { return __riscv_pnot_u16x2(a); }
+
+// CHECK-LABEL: test_pand_i8x8:
+// RV32-COUNT-2: and{{[[:space:]]}}
+// RV64: and{{[[:space:]]}}
+int8x8_t test_pand_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_pand_i8x8(a, b);
+}
+
+// CHECK-LABEL: test_pand_u8x8:
+// RV32-COUNT-2: and{{[[:space:]]}}
+// RV64: and{{[[:space:]]}}
+uint8x8_t test_pand_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_pand_u8x8(a, b);
+}
+
+// CHECK-LABEL: test_pand_i16x4:
+// RV32-COUNT-2: and{{[[:space:]]}}
+// RV64: and{{[[:space:]]}}
+int16x4_t test_pand_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_pand_i16x4(a, b);
+}
+
+// CHECK-LABEL: test_pand_u16x4:
+// RV32-COUNT-2: and{{[[:space:]]}}
+// RV64: and{{[[:space:]]}}
+uint16x4_t test_pand_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_pand_u16x4(a, b);
+}
+
+// CHECK-LABEL: test_pand_i32x2:
+// RV32-COUNT-2: and{{[[:space:]]}}
+// RV64: and{{[[:space:]]}}
+int32x2_t test_pand_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_pand_i32x2(a, b);
+}
+
+// CHECK-LABEL: test_pand_u32x2:
+// RV32-COUNT-2: and{{[[:space:]]}}
+// RV64: and{{[[:space:]]}}
+uint32x2_t test_pand_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_pand_u32x2(a, b);
+}
+
+// CHECK-LABEL: test_por_i8x8:
+// RV32-COUNT-2: or{{[[:space:]]}}
+// RV64: or{{[[:space:]]}}
+int8x8_t test_por_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_por_i8x8(a, b);
+}
+
+// CHECK-LABEL: test_por_u8x8:
+// RV32-COUNT-2: or{{[[:space:]]}}
+// RV64: or{{[[:space:]]}}
+uint8x8_t test_por_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_por_u8x8(a, b);
+}
+
+// CHECK-LABEL: test_por_i16x4:
+// RV32-COUNT-2: or{{[[:space:]]}}
+// RV64: or{{[[:space:]]}}
+int16x4_t test_por_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_por_i16x4(a, b);
+}
+
+// CHECK-LABEL: test_por_u16x4:
+// RV32-COUNT-2: or{{[[:space:]]}}
+// RV64: or{{[[:space:]]}}
+uint16x4_t test_por_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_por_u16x4(a, b);
+}
+
+// CHECK-LABEL: test_por_i32x2:
+// RV32-COUNT-2: or{{[[:space:]]}}
+// RV64: or{{[[:space:]]}}
+int32x2_t test_por_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_por_i32x2(a, b);
+}
+
+// CHECK-LABEL: test_por_u32x2:
+// RV32-COUNT-2: or{{[[:space:]]}}
+// RV64: or{{[[:space:]]}}
+uint32x2_t test_por_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_por_u32x2(a, b);
+}
+
+// CHECK-LABEL: test_pxor_i8x8:
+// RV32-COUNT-2: xor{{[[:space:]]}}
+// RV64: xor{{[[:space:]]}}
+int8x8_t test_pxor_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_pxor_i8x8(a, b);
+}
+
+// CHECK-LABEL: test_pxor_u8x8:
+// RV32-COUNT-2: xor{{[[:space:]]}}
+// RV64: xor{{[[:space:]]}}
+uint8x8_t test_pxor_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_pxor_u8x8(a, b);
+}
+
+// CHECK-LABEL: test_pxor_i16x4:
+// RV32-COUNT-2: xor{{[[:space:]]}}
+// RV64: xor{{[[:space:]]}}
+int16x4_t test_pxor_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_pxor_i16x4(a, b);
+}
+
+// CHECK-LABEL: test_pxor_u16x4:
+// RV32-COUNT-2: xor{{[[:space:]]}}
+// RV64: xor{{[[:space:]]}}
+uint16x4_t test_pxor_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_pxor_u16x4(a, b);
+}
+
+// CHECK-LABEL: test_pxor_i32x2:
+// RV32-COUNT-2: xor{{[[:space:]]}}
+// RV64: xor{{[[:space:]]}}
+int32x2_t test_pxor_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_pxor_i32x2(a, b);
+}
+
+// CHECK-LABEL: test_pxor_u32x2:
+// RV32-COUNT-2: xor{{[[:space:]]}}
+// RV64: xor{{[[:space:]]}}
+uint32x2_t test_pxor_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_pxor_u32x2(a, b);
+}
+
+// CHECK-LABEL: test_pnot_i8x8:
+// RV32-COUNT-2: not{{[[:space:]]}}
+// RV64: not{{[[:space:]]}}
+int8x8_t test_pnot_i8x8(int8x8_t a) { return __riscv_pnot_i8x8(a); }
+
+// CHECK-LABEL: test_pnot_u8x8:
+// RV32-COUNT-2: not{{[[:space:]]}}
+// RV64: not{{[[:space:]]}}
+uint8x8_t test_pnot_u8x8(uint8x8_t a) { return __riscv_pnot_u8x8(a); }
+
+// CHECK-LABEL: test_pnot_i16x4:
+// RV32-COUNT-2: not{{[[:space:]]}}
+// RV64: not{{[[:space:]]}}
+int16x4_t test_pnot_i16x4(int16x4_t a) { return __riscv_pnot_i16x4(a); }
+
+// CHECK-LABEL: test_pnot_u16x4:
+// RV32-COUNT-2: not{{[[:space:]]}}
+// RV64: not{{[[:space:]]}}
+uint16x4_t test_pnot_u16x4(uint16x4_t a) { return __riscv_pnot_u16x4(a); }
+
+// CHECK-LABEL: test_pnot_i32x2:
+// RV32-COUNT-2: not{{[[:space:]]}}
+// RV64: not{{[[:space:]]}}
+int32x2_t test_pnot_i32x2(int32x2_t a) { return __riscv_pnot_i32x2(a); }
+
+// CHECK-LABEL: test_pnot_u32x2:
+// RV32-COUNT-2: not{{[[:space:]]}}
+// RV64: not{{[[:space:]]}}
+uint32x2_t test_pnot_u32x2(uint32x2_t a) { return __riscv_pnot_u32x2(a); }
>From 0d38de0290789cf3d281171e2675b4e6d42947e4 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Thu, 21 May 2026 17:23:38 +0800
Subject: [PATCH 10/19] drop __aligned__
---
clang/lib/Headers/riscv_packed.h | 22 +++++++++++-----------
1 file changed, 11 insertions(+), 11 deletions(-)
diff --git a/clang/lib/Headers/riscv_packed.h b/clang/lib/Headers/riscv_packed.h
index c7605de340faa..6805a94d941d7 100644
--- a/clang/lib/Headers/riscv_packed.h
+++ b/clang/lib/Headers/riscv_packed.h
@@ -18,17 +18,17 @@ extern "C" {
/* Packed SIMD Types */
-typedef int8_t int8x4_t __attribute__((__vector_size__(4), __aligned__(4)));
-typedef uint8_t uint8x4_t __attribute__((__vector_size__(4), __aligned__(4)));
-typedef int16_t int16x2_t __attribute__((__vector_size__(4), __aligned__(4)));
-typedef uint16_t uint16x2_t __attribute__((__vector_size__(4), __aligned__(4)));
-
-typedef int8_t int8x8_t __attribute__((__vector_size__(8), __aligned__(8)));
-typedef uint8_t uint8x8_t __attribute__((__vector_size__(8), __aligned__(8)));
-typedef int16_t int16x4_t __attribute__((__vector_size__(8), __aligned__(8)));
-typedef uint16_t uint16x4_t __attribute__((__vector_size__(8), __aligned__(8)));
-typedef int32_t int32x2_t __attribute__((__vector_size__(8), __aligned__(8)));
-typedef uint32_t uint32x2_t __attribute__((__vector_size__(8), __aligned__(8)));
+typedef int8_t int8x4_t __attribute__((__vector_size__(4)));
+typedef uint8_t uint8x4_t __attribute__((__vector_size__(4)));
+typedef int16_t int16x2_t __attribute__((__vector_size__(4)));
+typedef uint16_t uint16x2_t __attribute__((__vector_size__(4)));
+
+typedef int8_t int8x8_t __attribute__((__vector_size__(8)));
+typedef uint8_t uint8x8_t __attribute__((__vector_size__(8)));
+typedef int16_t int16x4_t __attribute__((__vector_size__(8)));
+typedef uint16_t uint16x4_t __attribute__((__vector_size__(8)));
+typedef int32_t int32x2_t __attribute__((__vector_size__(8)));
+typedef uint32_t uint32x2_t __attribute__((__vector_size__(8)));
#define __DEFAULT_FN_ATTRS __attribute__((__always_inline__, __nodebug__))
>From a8d42aa15411a9959bd42d6a57a13ff0dfeec155 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Wed, 27 May 2026 10:22:03 +0800
Subject: [PATCH 11/19] RV64 32-bit pnot now emits not
---
.../intrinsic-header-tests/riscv_packed.c | 18 ++++--------------
1 file changed, 4 insertions(+), 14 deletions(-)
diff --git a/cross-project-tests/intrinsic-header-tests/riscv_packed.c b/cross-project-tests/intrinsic-header-tests/riscv_packed.c
index ba4973620c2c5..1afbb3eac007d 100644
--- a/cross-project-tests/intrinsic-header-tests/riscv_packed.c
+++ b/cross-project-tests/intrinsic-header-tests/riscv_packed.c
@@ -833,30 +833,20 @@ uint16x2_t test_pxor_u16x2(uint16x2_t a, uint16x2_t b) {
return __riscv_pxor_u16x2(a, b);
}
-// TODO: On RV64, vector `NOT` on a 32-bit packed type emits `li`+`xor`
-// instead of the `not` alias.
// CHECK-LABEL: test_pnot_i8x4:
-// RV32: not{{[[:space:]]}}
-// RV64: li
-// RV64-NEXT: xor{{[[:space:]]}}
+// CHECK: not{{[[:space:]]}}
int8x4_t test_pnot_i8x4(int8x4_t a) { return __riscv_pnot_i8x4(a); }
// CHECK-LABEL: test_pnot_u8x4:
-// RV32: not{{[[:space:]]}}
-// RV64: li
-// RV64-NEXT: xor{{[[:space:]]}}
+// CHECK: not{{[[:space:]]}}
uint8x4_t test_pnot_u8x4(uint8x4_t a) { return __riscv_pnot_u8x4(a); }
// CHECK-LABEL: test_pnot_i16x2:
-// RV32: not{{[[:space:]]}}
-// RV64: li
-// RV64-NEXT: xor{{[[:space:]]}}
+// CHECK: not{{[[:space:]]}}
int16x2_t test_pnot_i16x2(int16x2_t a) { return __riscv_pnot_i16x2(a); }
// CHECK-LABEL: test_pnot_u16x2:
-// RV32: not{{[[:space:]]}}
-// RV64: li
-// RV64-NEXT: xor{{[[:space:]]}}
+// CHECK: not{{[[:space:]]}}
uint16x2_t test_pnot_u16x2(uint16x2_t a) { return __riscv_pnot_u16x2(a); }
// CHECK-LABEL: test_pand_i8x8:
>From 3d20ce00633d89113d8db2d3f148fb40b3b54da9 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Wed, 27 May 2026 19:42:04 +0800
Subject: [PATCH 12/19] add -verify -Wextra to RUN lines
---
cross-project-tests/intrinsic-header-tests/riscv_packed.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/cross-project-tests/intrinsic-header-tests/riscv_packed.c b/cross-project-tests/intrinsic-header-tests/riscv_packed.c
index 1afbb3eac007d..6afc1b2dce869 100644
--- a/cross-project-tests/intrinsic-header-tests/riscv_packed.c
+++ b/cross-project-tests/intrinsic-header-tests/riscv_packed.c
@@ -2,10 +2,10 @@
// expected-no-diagnostics
// RUN: %clang %s -O2 -S -o - --target=riscv32 \
-// RUN: -menable-experimental-extensions -march=rv32i_p0p21 -Werror \
+// RUN: -menable-experimental-extensions -march=rv32i_p0p21 -Werror -Wextra -Xclang -verify \
// RUN: | FileCheck %s --check-prefixes=CHECK,RV32
// RUN: %clang %s -O2 -S -o - --target=riscv64 \
-// RUN: -menable-experimental-extensions -march=rv64i_p0p21 -Werror \
+// RUN: -menable-experimental-extensions -march=rv64i_p0p21 -Werror -Wextra -Xclang -verify \
// RUN: | FileCheck %s --check-prefixes=CHECK,RV64
#include <riscv_packed.h>
>From 163f936df5c21ad9c27b48198b2cf1067fb04e0b Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Fri, 29 May 2026 10:13:37 +0800
Subject: [PATCH 13/19] chore: trigger PR update
>From 043782a9475beea0317f514643914be7bb7a825a Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Tue, 2 Jun 2026 16:44:00 +0800
Subject: [PATCH 14/19] rename riscv_packed.h to riscv_packed_simd.h
---
clang/lib/Headers/CMakeLists.txt | 2 +-
clang/lib/Headers/{riscv_packed.h => riscv_packed_simd.h} | 8 ++++----
clang/test/CodeGen/RISCV/rvp-intrinsics.c | 2 +-
.../{riscv_packed.c => riscv_packed_simd.c} | 2 +-
4 files changed, 7 insertions(+), 7 deletions(-)
rename clang/lib/Headers/{riscv_packed.h => riscv_packed_simd.h} (98%)
rename cross-project-tests/intrinsic-header-tests/{riscv_packed.c => riscv_packed_simd.c} (99%)
diff --git a/clang/lib/Headers/CMakeLists.txt b/clang/lib/Headers/CMakeLists.txt
index 59cd039747ae6..439f2725168ba 100644
--- a/clang/lib/Headers/CMakeLists.txt
+++ b/clang/lib/Headers/CMakeLists.txt
@@ -140,7 +140,7 @@ set(riscv_files
riscv_corev_alu.h
riscv_mips.h
riscv_nds.h
- riscv_packed.h
+ riscv_packed_simd.h
sifive_vector.h
)
diff --git a/clang/lib/Headers/riscv_packed.h b/clang/lib/Headers/riscv_packed_simd.h
similarity index 98%
rename from clang/lib/Headers/riscv_packed.h
rename to clang/lib/Headers/riscv_packed_simd.h
index 6805a94d941d7..a25fb8a696f1d 100644
--- a/clang/lib/Headers/riscv_packed.h
+++ b/clang/lib/Headers/riscv_packed_simd.h
@@ -1,4 +1,4 @@
-/*===---- riscv_packed.h - RISC-V P intrinsics -----------------------------===
+/*===---- riscv_packed_simd.h - RISC-V P intrinsics ------------------------===
*
* Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
* See https://llvm.org/LICENSE.txt for license information.
@@ -7,8 +7,8 @@
*===-----------------------------------------------------------------------===
*/
-#ifndef __RISCV_PACKED_H
-#define __RISCV_PACKED_H
+#ifndef __RISCV_PACKED_SIMD_H
+#define __RISCV_PACKED_SIMD_H
#include <stdint.h>
@@ -246,4 +246,4 @@ __packed_unary_op(pnot_u32x2, uint32x2_t, ~)
}
#endif
-#endif /* __RISCV_PACKED_H */
+#endif /* __RISCV_PACKED_SIMD_H */
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
index e79c98dfd93a5..c84eb6ac2e270 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
@@ -6,7 +6,7 @@
// RUN: -disable-O0-optnone -emit-llvm -o - %s \
// RUN: | opt -S -passes=sroa,instcombine | FileCheck %s --check-prefix=RV64
-#include <riscv_packed.h>
+#include <riscv_packed_simd.h>
/* Packed Splat (32-bit) */
diff --git a/cross-project-tests/intrinsic-header-tests/riscv_packed.c b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
similarity index 99%
rename from cross-project-tests/intrinsic-header-tests/riscv_packed.c
rename to cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
index 6afc1b2dce869..f5a31a900403a 100644
--- a/cross-project-tests/intrinsic-header-tests/riscv_packed.c
+++ b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
@@ -8,7 +8,7 @@
// RUN: -menable-experimental-extensions -march=rv64i_p0p21 -Werror -Wextra -Xclang -verify \
// RUN: | FileCheck %s --check-prefixes=CHECK,RV64
-#include <riscv_packed.h>
+#include <riscv_packed_simd.h>
// CHECK-LABEL: test_pmv_s_u8x4:
// CHECK: pmv.bs
>From 64ab5bfed70db7f26630ac7ed2e579399d1e900b Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Tue, 2 Jun 2026 17:59:13 +0800
Subject: [PATCH 15/19] [Clang][RISCV] packed saturating add/sub intrinsics
Add header wrappers for psadd/psaddu/pssub/pssubu (i8x4/i16x2/u8x4/u16x2
and the 64-bit i8x8/i16x4/i32x2/u8x8/u16x4/u32x2), mapping to
__builtin_elementwise_add_sat / sub_sat. Backend instructions, isel
patterns and rvp-simd-*.ll coverage already exist; this wires up the
intrinsic spec section and the cross-project mnemonic test.
---
clang/lib/Headers/riscv_packed_simd.h | 68 ++-
clang/test/CodeGen/RISCV/rvp-intrinsics.c | 444 ++++++++++++++++++
.../riscv_packed_simd.c | 132 ++++++
3 files changed, 622 insertions(+), 22 deletions(-)
diff --git a/clang/lib/Headers/riscv_packed_simd.h b/clang/lib/Headers/riscv_packed_simd.h
index a25fb8a696f1d..0fc1de13c17cc 100644
--- a/clang/lib/Headers/riscv_packed_simd.h
+++ b/clang/lib/Headers/riscv_packed_simd.h
@@ -67,7 +67,7 @@ typedef uint32_t uint32x2_t __attribute__((__vector_size__(8)));
return op __rs1; \
}
-#define __packed_minmax(name, ty, builtin) \
+#define __packed_binary_builtin(name, ty, builtin) \
static __inline__ ty __DEFAULT_FN_ATTRS \
__riscv_##name(ty __rs1, ty __rs2) { \
return builtin(__rs1, __rs2); \
@@ -160,29 +160,53 @@ __packed_unary_op(pneg_i8x8, int8x8_t, -)
__packed_unary_op(pneg_i16x4, int16x4_t, -)
__packed_unary_op(pneg_i32x2, int32x2_t, -)
+/* Packed Saturating Addition and Subtraction (32-bit) */
+__packed_binary_builtin(psadd_i8x4, int8x4_t, __builtin_elementwise_add_sat)
+__packed_binary_builtin(psadd_i16x2, int16x2_t, __builtin_elementwise_add_sat)
+__packed_binary_builtin(psaddu_u8x4, uint8x4_t, __builtin_elementwise_add_sat)
+__packed_binary_builtin(psaddu_u16x2, uint16x2_t, __builtin_elementwise_add_sat)
+__packed_binary_builtin(pssub_i8x4, int8x4_t, __builtin_elementwise_sub_sat)
+__packed_binary_builtin(pssub_i16x2, int16x2_t, __builtin_elementwise_sub_sat)
+__packed_binary_builtin(pssubu_u8x4, uint8x4_t, __builtin_elementwise_sub_sat)
+__packed_binary_builtin(pssubu_u16x2, uint16x2_t, __builtin_elementwise_sub_sat)
+
+/* Packed Saturating Addition and Subtraction (64-bit) */
+__packed_binary_builtin(psadd_i8x8, int8x8_t, __builtin_elementwise_add_sat)
+__packed_binary_builtin(psadd_i16x4, int16x4_t, __builtin_elementwise_add_sat)
+__packed_binary_builtin(psadd_i32x2, int32x2_t, __builtin_elementwise_add_sat)
+__packed_binary_builtin(psaddu_u8x8, uint8x8_t, __builtin_elementwise_add_sat)
+__packed_binary_builtin(psaddu_u16x4, uint16x4_t, __builtin_elementwise_add_sat)
+__packed_binary_builtin(psaddu_u32x2, uint32x2_t, __builtin_elementwise_add_sat)
+__packed_binary_builtin(pssub_i8x8, int8x8_t, __builtin_elementwise_sub_sat)
+__packed_binary_builtin(pssub_i16x4, int16x4_t, __builtin_elementwise_sub_sat)
+__packed_binary_builtin(pssub_i32x2, int32x2_t, __builtin_elementwise_sub_sat)
+__packed_binary_builtin(pssubu_u8x8, uint8x8_t, __builtin_elementwise_sub_sat)
+__packed_binary_builtin(pssubu_u16x4, uint16x4_t, __builtin_elementwise_sub_sat)
+__packed_binary_builtin(pssubu_u32x2, uint32x2_t, __builtin_elementwise_sub_sat)
+
/* Packed Minimum and Maximum (32-bit) */
-__packed_minmax(pmin_i8x4, int8x4_t, __builtin_elementwise_min)
-__packed_minmax(pmin_i16x2, int16x2_t, __builtin_elementwise_min)
-__packed_minmax(pminu_u8x4, uint8x4_t, __builtin_elementwise_min)
-__packed_minmax(pminu_u16x2, uint16x2_t, __builtin_elementwise_min)
-__packed_minmax(pmax_i8x4, int8x4_t, __builtin_elementwise_max)
-__packed_minmax(pmax_i16x2, int16x2_t, __builtin_elementwise_max)
-__packed_minmax(pmaxu_u8x4, uint8x4_t, __builtin_elementwise_max)
-__packed_minmax(pmaxu_u16x2, uint16x2_t, __builtin_elementwise_max)
+__packed_binary_builtin(pmin_i8x4, int8x4_t, __builtin_elementwise_min)
+__packed_binary_builtin(pmin_i16x2, int16x2_t, __builtin_elementwise_min)
+__packed_binary_builtin(pminu_u8x4, uint8x4_t, __builtin_elementwise_min)
+__packed_binary_builtin(pminu_u16x2, uint16x2_t, __builtin_elementwise_min)
+__packed_binary_builtin(pmax_i8x4, int8x4_t, __builtin_elementwise_max)
+__packed_binary_builtin(pmax_i16x2, int16x2_t, __builtin_elementwise_max)
+__packed_binary_builtin(pmaxu_u8x4, uint8x4_t, __builtin_elementwise_max)
+__packed_binary_builtin(pmaxu_u16x2, uint16x2_t, __builtin_elementwise_max)
/* Packed Minimum and Maximum (64-bit) */
-__packed_minmax(pmin_i8x8, int8x8_t, __builtin_elementwise_min)
-__packed_minmax(pmin_i16x4, int16x4_t, __builtin_elementwise_min)
-__packed_minmax(pmin_i32x2, int32x2_t, __builtin_elementwise_min)
-__packed_minmax(pminu_u8x8, uint8x8_t, __builtin_elementwise_min)
-__packed_minmax(pminu_u16x4, uint16x4_t, __builtin_elementwise_min)
-__packed_minmax(pminu_u32x2, uint32x2_t, __builtin_elementwise_min)
-__packed_minmax(pmax_i8x8, int8x8_t, __builtin_elementwise_max)
-__packed_minmax(pmax_i16x4, int16x4_t, __builtin_elementwise_max)
-__packed_minmax(pmax_i32x2, int32x2_t, __builtin_elementwise_max)
-__packed_minmax(pmaxu_u8x8, uint8x8_t, __builtin_elementwise_max)
-__packed_minmax(pmaxu_u16x4, uint16x4_t, __builtin_elementwise_max)
-__packed_minmax(pmaxu_u32x2, uint32x2_t, __builtin_elementwise_max)
+__packed_binary_builtin(pmin_i8x8, int8x8_t, __builtin_elementwise_min)
+__packed_binary_builtin(pmin_i16x4, int16x4_t, __builtin_elementwise_min)
+__packed_binary_builtin(pmin_i32x2, int32x2_t, __builtin_elementwise_min)
+__packed_binary_builtin(pminu_u8x8, uint8x8_t, __builtin_elementwise_min)
+__packed_binary_builtin(pminu_u16x4, uint16x4_t, __builtin_elementwise_min)
+__packed_binary_builtin(pminu_u32x2, uint32x2_t, __builtin_elementwise_min)
+__packed_binary_builtin(pmax_i8x8, int8x8_t, __builtin_elementwise_max)
+__packed_binary_builtin(pmax_i16x4, int16x4_t, __builtin_elementwise_max)
+__packed_binary_builtin(pmax_i32x2, int32x2_t, __builtin_elementwise_max)
+__packed_binary_builtin(pmaxu_u8x8, uint8x8_t, __builtin_elementwise_max)
+__packed_binary_builtin(pmaxu_u16x4, uint16x4_t, __builtin_elementwise_max)
+__packed_binary_builtin(pmaxu_u32x2, uint32x2_t, __builtin_elementwise_max)
/* Packed Logical Operations (32-bit) */
__packed_binary_op(pand_i8x4, int8x4_t, &)
@@ -239,7 +263,7 @@ __packed_unary_op(pnot_u32x2, uint32x2_t, ~)
#undef __packed_scalar_binary_op
#undef __packed_binary_op
#undef __packed_unary_op
-#undef __packed_minmax
+#undef __packed_binary_builtin
#undef __DEFAULT_FN_ATTRS
#if defined(__cplusplus)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
index c84eb6ac2e270..4f64b7dd34c55 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
@@ -1556,6 +1556,450 @@ int32x2_t test_pneg_i32x2(int32x2_t a) {
return __riscv_pneg_i32x2(a);
}
+/* Packed Saturating Addition and Subtraction (32-bit) */
+
+// RV32-LABEL: define dso_local i32 @test_psadd_i8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_psadd_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+int8x4_t test_psadd_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_psadd_i8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psadd_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_psadd_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+int16x2_t test_psadd_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_psadd_i16x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psaddu_u8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_psaddu_u8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+uint8x4_t test_psaddu_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_psaddu_u8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psaddu_u16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_psaddu_u16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+uint16x2_t test_psaddu_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_psaddu_u16x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pssub_i8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_pssub_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+int8x4_t test_pssub_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_pssub_i8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pssub_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_pssub_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+int16x2_t test_pssub_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_pssub_i16x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pssubu_u8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_pssubu_u8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+uint8x4_t test_pssubu_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_pssubu_u8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pssubu_u16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_pssubu_u16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+uint16x2_t test_pssubu_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_pssubu_u16x2(a, b);
+}
+
+/* Packed Saturating Addition and Subtraction (64-bit) */
+
+// RV32-LABEL: define dso_local i64 @test_psadd_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psadd_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int8x8_t test_psadd_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_psadd_i8x8(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psadd_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psadd_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int16x4_t test_psadd_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_psadd_i16x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psadd_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psadd_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int32x2_t test_psadd_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_psadd_i32x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psaddu_u8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psaddu_u8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint8x8_t test_psaddu_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_psaddu_u8x8(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psaddu_u16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psaddu_u16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint16x4_t test_psaddu_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_psaddu_u16x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psaddu_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psaddu_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint32x2_t test_psaddu_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_psaddu_u32x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pssub_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_pssub_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int8x8_t test_pssub_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_pssub_i8x8(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pssub_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_pssub_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int16x4_t test_pssub_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_pssub_i16x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pssub_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_pssub_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int32x2_t test_pssub_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_pssub_i32x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pssubu_u8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_pssubu_u8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint8x8_t test_pssubu_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_pssubu_u8x8(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pssubu_u16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_pssubu_u16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint16x4_t test_pssubu_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_pssubu_u16x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pssubu_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_pssubu_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint32x2_t test_pssubu_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_pssubu_u32x2(a, b);
+}
+
/* Packed Minimum and Maximum (32-bit) */
// RV32-LABEL: define dso_local i32 @test_pmin_i8x4(
diff --git a/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
index f5a31a900403a..98de0ffa650b7 100644
--- a/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
+++ b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
@@ -629,6 +629,138 @@ int16x4_t test_pneg_i16x4(int16x4_t a) { return __riscv_pneg_i16x4(a); }
// RV64: pneg.w
int32x2_t test_pneg_i32x2(int32x2_t a) { return __riscv_pneg_i32x2(a); }
+// CHECK-LABEL: test_psadd_i8x4:
+// CHECK: psadd.b
+int8x4_t test_psadd_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_psadd_i8x4(a, b);
+}
+
+// CHECK-LABEL: test_psadd_i16x2:
+// CHECK: psadd.h
+int16x2_t test_psadd_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_psadd_i16x2(a, b);
+}
+
+// CHECK-LABEL: test_psaddu_u8x4:
+// CHECK: psaddu.b
+uint8x4_t test_psaddu_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_psaddu_u8x4(a, b);
+}
+
+// CHECK-LABEL: test_psaddu_u16x2:
+// CHECK: psaddu.h
+uint16x2_t test_psaddu_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_psaddu_u16x2(a, b);
+}
+
+// CHECK-LABEL: test_pssub_i8x4:
+// CHECK: pssub.b
+int8x4_t test_pssub_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_pssub_i8x4(a, b);
+}
+
+// CHECK-LABEL: test_pssub_i16x2:
+// CHECK: pssub.h
+int16x2_t test_pssub_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_pssub_i16x2(a, b);
+}
+
+// CHECK-LABEL: test_pssubu_u8x4:
+// CHECK: pssubu.b
+uint8x4_t test_pssubu_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_pssubu_u8x4(a, b);
+}
+
+// CHECK-LABEL: test_pssubu_u16x2:
+// CHECK: pssubu.h
+uint16x2_t test_pssubu_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_pssubu_u16x2(a, b);
+}
+
+// CHECK-LABEL: test_psadd_i8x8:
+// RV32: psadd.db
+// RV64: psadd.b
+int8x8_t test_psadd_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_psadd_i8x8(a, b);
+}
+
+// CHECK-LABEL: test_psadd_i16x4:
+// RV32: psadd.dh
+// RV64: psadd.h
+int16x4_t test_psadd_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_psadd_i16x4(a, b);
+}
+
+// CHECK-LABEL: test_psadd_i32x2:
+// RV32: psadd.dw
+// RV64: psadd.w
+int32x2_t test_psadd_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_psadd_i32x2(a, b);
+}
+
+// CHECK-LABEL: test_psaddu_u8x8:
+// RV32: psaddu.db
+// RV64: psaddu.b
+uint8x8_t test_psaddu_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_psaddu_u8x8(a, b);
+}
+
+// CHECK-LABEL: test_psaddu_u16x4:
+// RV32: psaddu.dh
+// RV64: psaddu.h
+uint16x4_t test_psaddu_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_psaddu_u16x4(a, b);
+}
+
+// CHECK-LABEL: test_psaddu_u32x2:
+// RV32: psaddu.dw
+// RV64: psaddu.w
+uint32x2_t test_psaddu_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_psaddu_u32x2(a, b);
+}
+
+// CHECK-LABEL: test_pssub_i8x8:
+// RV32: pssub.db
+// RV64: pssub.b
+int8x8_t test_pssub_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_pssub_i8x8(a, b);
+}
+
+// CHECK-LABEL: test_pssub_i16x4:
+// RV32: pssub.dh
+// RV64: pssub.h
+int16x4_t test_pssub_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_pssub_i16x4(a, b);
+}
+
+// CHECK-LABEL: test_pssub_i32x2:
+// RV32: pssub.dw
+// RV64: pssub.w
+int32x2_t test_pssub_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_pssub_i32x2(a, b);
+}
+
+// CHECK-LABEL: test_pssubu_u8x8:
+// RV32: pssubu.db
+// RV64: pssubu.b
+uint8x8_t test_pssubu_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_pssubu_u8x8(a, b);
+}
+
+// CHECK-LABEL: test_pssubu_u16x4:
+// RV32: pssubu.dh
+// RV64: pssubu.h
+uint16x4_t test_pssubu_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_pssubu_u16x4(a, b);
+}
+
+// CHECK-LABEL: test_pssubu_u32x2:
+// RV32: pssubu.dw
+// RV64: pssubu.w
+uint32x2_t test_pssubu_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_pssubu_u32x2(a, b);
+}
+
// CHECK-LABEL: test_pmin_i8x4:
// CHECK: pmin.b
int8x4_t test_pmin_i8x4(int8x4_t a, int8x4_t b) {
>From ff6d88e6c140d9dcca60dd93c09480859a41d5ca Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Wed, 3 Jun 2026 15:45:59 +0800
Subject: [PATCH 16/19] [Clang][RISCV] packed shift-add intrinsics
Add psh1add / pssh1sadd header wrappers and the cross-project test.
---
clang/lib/Headers/riscv_packed_simd.h | 32 +++
clang/test/CodeGen/RISCV/rvp-intrinsics.c | 220 ++++++++++++++++++
.../riscv_packed_simd.c | 60 +++++
3 files changed, 312 insertions(+)
diff --git a/clang/lib/Headers/riscv_packed_simd.h b/clang/lib/Headers/riscv_packed_simd.h
index 0fc1de13c17cc..e7c0e31daffce 100644
--- a/clang/lib/Headers/riscv_packed_simd.h
+++ b/clang/lib/Headers/riscv_packed_simd.h
@@ -73,6 +73,23 @@ typedef uint32_t uint32x2_t __attribute__((__vector_size__(8)));
return builtin(__rs1, __rs2); \
}
+#define __packed_sh1add(name, ty) \
+ static __inline__ ty __DEFAULT_FN_ATTRS \
+ __riscv_##name(ty __rs1, ty __rs2) { \
+ return (__rs1 << 1) + __rs2; \
+ }
+
+/* TODO: switch to sadd_sat(__builtin_elementwise_shl_sat(a, 1), b) once a
+ * generic elementwise shl_sat builtin exists. sadd_sat(a, a) is equivalent
+ * for signed types and the backend's saturating_shl1 PatFrags matches both
+ * shapes. */
+#define __packed_sh1sadd(name, ty) \
+ static __inline__ ty __DEFAULT_FN_ATTRS \
+ __riscv_##name(ty __rs1, ty __rs2) { \
+ return __builtin_elementwise_add_sat( \
+ __builtin_elementwise_add_sat(__rs1, __rs1), __rs2); \
+ }
+
/* Packed Splat (32-bit) */
__packed_splat(pmv_s_u8x4, uint8x4_t, uint8_t, __packed_splat4)
__packed_splat(pmv_s_i8x4, int8x4_t, int8_t, __packed_splat4)
@@ -184,6 +201,19 @@ __packed_binary_builtin(pssubu_u8x8, uint8x8_t, __builtin_elementwise_sub_sat)
__packed_binary_builtin(pssubu_u16x4, uint16x4_t, __builtin_elementwise_sub_sat)
__packed_binary_builtin(pssubu_u32x2, uint32x2_t, __builtin_elementwise_sub_sat)
+/* Packed Shift-Add (32-bit) */
+__packed_sh1add(psh1add_i16x2, int16x2_t)
+__packed_sh1add(psh1add_u16x2, uint16x2_t)
+__packed_sh1sadd(pssh1sadd_i16x2, int16x2_t)
+
+/* Packed Shift-Add (64-bit) */
+__packed_sh1add(psh1add_i16x4, int16x4_t)
+__packed_sh1add(psh1add_u16x4, uint16x4_t)
+__packed_sh1add(psh1add_i32x2, int32x2_t)
+__packed_sh1add(psh1add_u32x2, uint32x2_t)
+__packed_sh1sadd(pssh1sadd_i16x4, int16x4_t)
+__packed_sh1sadd(pssh1sadd_i32x2, int32x2_t)
+
/* Packed Minimum and Maximum (32-bit) */
__packed_binary_builtin(pmin_i8x4, int8x4_t, __builtin_elementwise_min)
__packed_binary_builtin(pmin_i16x2, int16x2_t, __builtin_elementwise_min)
@@ -264,6 +294,8 @@ __packed_unary_op(pnot_u32x2, uint32x2_t, ~)
#undef __packed_binary_op
#undef __packed_unary_op
#undef __packed_binary_builtin
+#undef __packed_sh1add
+#undef __packed_sh1sadd
#undef __DEFAULT_FN_ATTRS
#if defined(__cplusplus)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
index 4f64b7dd34c55..b7bd0458ca297 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
@@ -2000,6 +2000,226 @@ uint32x2_t test_pssubu_u32x2(uint32x2_t a, uint32x2_t b) {
return __riscv_pssubu_u32x2(a, b);
}
+/* Packed Shift-Add (32-bit) */
+
+// RV32-LABEL: define dso_local i32 @test_psh1add_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], splat (i16 1)
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[SHL_I]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_psh1add_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], splat (i16 1)
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[SHL_I]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+int16x2_t test_psh1add_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_psh1add_i16x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psh1add_u16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], splat (i16 1)
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[SHL_I]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_psh1add_u16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], splat (i16 1)
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[SHL_I]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+uint16x2_t test_psh1add_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_psh1add_u16x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pssh1sadd_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP0]])
+// RV32-NEXT: [[ELT_SAT3_I:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[ELT_SAT_I]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT3_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_pssh1sadd_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP0]])
+// RV64-NEXT: [[ELT_SAT3_I:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[ELT_SAT_I]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT3_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+int16x2_t test_pssh1sadd_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_pssh1sadd_i16x2(a, b);
+}
+
+/* Packed Shift-Add (64-bit) */
+
+// RV32-LABEL: define dso_local i64 @test_psh1add_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], splat (i16 1)
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[SHL_I]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psh1add_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], splat (i16 1)
+// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[SHL_I]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int16x4_t test_psh1add_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_psh1add_i16x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psh1add_u16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], splat (i16 1)
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[SHL_I]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psh1add_u16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], splat (i16 1)
+// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[SHL_I]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint16x4_t test_psh1add_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_psh1add_u16x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psh1add_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], splat (i32 1)
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[SHL_I]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psh1add_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], splat (i32 1)
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[SHL_I]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int32x2_t test_psh1add_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_psh1add_i32x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psh1add_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], splat (i32 1)
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[SHL_I]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psh1add_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], splat (i32 1)
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[SHL_I]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint32x2_t test_psh1add_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_psh1add_u32x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pssh1sadd_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP0]])
+// RV32-NEXT: [[ELT_SAT3_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> [[ELT_SAT_I]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT3_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_pssh1sadd_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP0]])
+// RV64-NEXT: [[ELT_SAT3_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> [[ELT_SAT_I]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT3_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int16x4_t test_pssh1sadd_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_pssh1sadd_i16x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pssh1sadd_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP0]])
+// RV32-NEXT: [[ELT_SAT3_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[ELT_SAT_I]], <2 x i32> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT3_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_pssh1sadd_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP0]])
+// RV64-NEXT: [[ELT_SAT3_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[ELT_SAT_I]], <2 x i32> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT3_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int32x2_t test_pssh1sadd_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_pssh1sadd_i32x2(a, b);
+}
+
/* Packed Minimum and Maximum (32-bit) */
// RV32-LABEL: define dso_local i32 @test_pmin_i8x4(
diff --git a/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
index 98de0ffa650b7..6a01dcfa35219 100644
--- a/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
+++ b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
@@ -761,6 +761,66 @@ uint32x2_t test_pssubu_u32x2(uint32x2_t a, uint32x2_t b) {
return __riscv_pssubu_u32x2(a, b);
}
+// CHECK-LABEL: test_psh1add_i16x2:
+// CHECK: psh1add.h
+int16x2_t test_psh1add_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_psh1add_i16x2(a, b);
+}
+
+// CHECK-LABEL: test_psh1add_u16x2:
+// CHECK: psh1add.h
+uint16x2_t test_psh1add_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_psh1add_u16x2(a, b);
+}
+
+// CHECK-LABEL: test_pssh1sadd_i16x2:
+// CHECK: pssh1sadd.h
+int16x2_t test_pssh1sadd_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_pssh1sadd_i16x2(a, b);
+}
+
+// CHECK-LABEL: test_psh1add_i16x4:
+// RV32: psh1add.dh
+// RV64: psh1add.h
+int16x4_t test_psh1add_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_psh1add_i16x4(a, b);
+}
+
+// CHECK-LABEL: test_psh1add_u16x4:
+// RV32: psh1add.dh
+// RV64: psh1add.h
+uint16x4_t test_psh1add_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_psh1add_u16x4(a, b);
+}
+
+// CHECK-LABEL: test_psh1add_i32x2:
+// RV32: psh1add.dw
+// RV64: psh1add.w
+int32x2_t test_psh1add_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_psh1add_i32x2(a, b);
+}
+
+// CHECK-LABEL: test_psh1add_u32x2:
+// RV32: psh1add.dw
+// RV64: psh1add.w
+uint32x2_t test_psh1add_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_psh1add_u32x2(a, b);
+}
+
+// CHECK-LABEL: test_pssh1sadd_i16x4:
+// RV32: pssh1sadd.dh
+// RV64: pssh1sadd.h
+int16x4_t test_pssh1sadd_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_pssh1sadd_i16x4(a, b);
+}
+
+// CHECK-LABEL: test_pssh1sadd_i32x2:
+// RV32: pssh1sadd.dw
+// RV64: pssh1sadd.w
+int32x2_t test_pssh1sadd_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_pssh1sadd_i32x2(a, b);
+}
+
// CHECK-LABEL: test_pmin_i8x4:
// CHECK: pmin.b
int8x4_t test_pmin_i8x4(int8x4_t a, int8x4_t b) {
>From 8e0291e1b199f1c80bd583155c971ab9a3dd3425 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Tue, 9 Jun 2026 08:20:31 +0800
Subject: [PATCH 17/19] reorder sections to match latest spec
---
clang/lib/Headers/riscv_packed_simd.h | 88 +-
clang/test/CodeGen/RISCV/rvp-intrinsics.c | 2778 ++++++++---------
.../riscv_packed_simd.c | 644 ++--
3 files changed, 1755 insertions(+), 1755 deletions(-)
diff --git a/clang/lib/Headers/riscv_packed_simd.h b/clang/lib/Headers/riscv_packed_simd.h
index e7c0e31daffce..1f4f33c5fafa1 100644
--- a/clang/lib/Headers/riscv_packed_simd.h
+++ b/clang/lib/Headers/riscv_packed_simd.h
@@ -104,50 +104,6 @@ __packed_splat(pmv_s_i16x4, int16x4_t, int16_t, __packed_splat4)
__packed_splat(pmv_s_u32x2, uint32x2_t, uint32_t, __packed_splat2)
__packed_splat(pmv_s_i32x2, int32x2_t, int32_t, __packed_splat2)
-/* Packed Shifts (32-bit) */
-__packed_shift8(psll_s_u8x4, uint8x4_t, <<)
-__packed_shift8(psll_s_i8x4, int8x4_t, <<)
-__packed_shift16(psll_s_u16x2, uint16x2_t, <<)
-__packed_shift16(psll_s_i16x2, int16x2_t, <<)
-__packed_shift8(psrl_s_u8x4, uint8x4_t, >>)
-__packed_shift16(psrl_s_u16x2, uint16x2_t, >>)
-__packed_shift8(psra_s_i8x4, int8x4_t, >>)
-__packed_shift16(psra_s_i16x2, int16x2_t, >>)
-
-/* Packed Shifts (64-bit) */
-__packed_shift8(psll_s_u8x8, uint8x8_t, <<)
-__packed_shift8(psll_s_i8x8, int8x8_t, <<)
-__packed_shift16(psll_s_u16x4, uint16x4_t, <<)
-__packed_shift16(psll_s_i16x4, int16x4_t, <<)
-__packed_shift32(psll_s_u32x2, uint32x2_t, <<)
-__packed_shift32(psll_s_i32x2, int32x2_t, <<)
-__packed_shift8(psrl_s_u8x8, uint8x8_t, >>)
-__packed_shift16(psrl_s_u16x4, uint16x4_t, >>)
-__packed_shift32(psrl_s_u32x2, uint32x2_t, >>)
-__packed_shift8(psra_s_i8x8, int8x8_t, >>)
-__packed_shift16(psra_s_i16x4, int16x4_t, >>)
-__packed_shift32(psra_s_i32x2, int32x2_t, >>)
-
-/* Packed Addition with Scalar (32-bit) */
-__packed_scalar_binary_op(padd_s_u8x4, uint8x4_t, uint8_t, +, __packed_splat4)
-__packed_scalar_binary_op(padd_s_i8x4, int8x4_t, int8_t, +, __packed_splat4)
-__packed_scalar_binary_op(padd_s_u16x2, uint16x2_t, uint16_t, +,
- __packed_splat2)
-__packed_scalar_binary_op(padd_s_i16x2, int16x2_t, int16_t, +,
- __packed_splat2)
-
-/* Packed Addition with Scalar (64-bit) */
-__packed_scalar_binary_op(padd_s_u8x8, uint8x8_t, uint8_t, +, __packed_splat8)
-__packed_scalar_binary_op(padd_s_i8x8, int8x8_t, int8_t, +, __packed_splat8)
-__packed_scalar_binary_op(padd_s_u16x4, uint16x4_t, uint16_t, +,
- __packed_splat4)
-__packed_scalar_binary_op(padd_s_i16x4, int16x4_t, int16_t, +,
- __packed_splat4)
-__packed_scalar_binary_op(padd_s_u32x2, uint32x2_t, uint32_t, +,
- __packed_splat2)
-__packed_scalar_binary_op(padd_s_i32x2, int32x2_t, int32_t, +,
- __packed_splat2)
-
/* Packed Addition and Subtraction (32-bit) */
__packed_binary_op(padd_i8x4, int8x4_t, +)
__packed_binary_op(padd_u8x4, uint8x4_t, +)
@@ -177,6 +133,26 @@ __packed_unary_op(pneg_i8x8, int8x8_t, -)
__packed_unary_op(pneg_i16x4, int16x4_t, -)
__packed_unary_op(pneg_i32x2, int32x2_t, -)
+/* Packed Addition with Scalar (32-bit) */
+__packed_scalar_binary_op(padd_s_u8x4, uint8x4_t, uint8_t, +, __packed_splat4)
+__packed_scalar_binary_op(padd_s_i8x4, int8x4_t, int8_t, +, __packed_splat4)
+__packed_scalar_binary_op(padd_s_u16x2, uint16x2_t, uint16_t, +,
+ __packed_splat2)
+__packed_scalar_binary_op(padd_s_i16x2, int16x2_t, int16_t, +,
+ __packed_splat2)
+
+/* Packed Addition with Scalar (64-bit) */
+__packed_scalar_binary_op(padd_s_u8x8, uint8x8_t, uint8_t, +, __packed_splat8)
+__packed_scalar_binary_op(padd_s_i8x8, int8x8_t, int8_t, +, __packed_splat8)
+__packed_scalar_binary_op(padd_s_u16x4, uint16x4_t, uint16_t, +,
+ __packed_splat4)
+__packed_scalar_binary_op(padd_s_i16x4, int16x4_t, int16_t, +,
+ __packed_splat4)
+__packed_scalar_binary_op(padd_s_u32x2, uint32x2_t, uint32_t, +,
+ __packed_splat2)
+__packed_scalar_binary_op(padd_s_i32x2, int32x2_t, int32_t, +,
+ __packed_splat2)
+
/* Packed Saturating Addition and Subtraction (32-bit) */
__packed_binary_builtin(psadd_i8x4, int8x4_t, __builtin_elementwise_add_sat)
__packed_binary_builtin(psadd_i16x2, int16x2_t, __builtin_elementwise_add_sat)
@@ -238,6 +214,30 @@ __packed_binary_builtin(pmaxu_u8x8, uint8x8_t, __builtin_elementwise_max)
__packed_binary_builtin(pmaxu_u16x4, uint16x4_t, __builtin_elementwise_max)
__packed_binary_builtin(pmaxu_u32x2, uint32x2_t, __builtin_elementwise_max)
+/* Packed Shifts (32-bit) */
+__packed_shift8(psll_s_u8x4, uint8x4_t, <<)
+__packed_shift8(psll_s_i8x4, int8x4_t, <<)
+__packed_shift16(psll_s_u16x2, uint16x2_t, <<)
+__packed_shift16(psll_s_i16x2, int16x2_t, <<)
+__packed_shift8(psrl_s_u8x4, uint8x4_t, >>)
+__packed_shift16(psrl_s_u16x2, uint16x2_t, >>)
+__packed_shift8(psra_s_i8x4, int8x4_t, >>)
+__packed_shift16(psra_s_i16x2, int16x2_t, >>)
+
+/* Packed Shifts (64-bit) */
+__packed_shift8(psll_s_u8x8, uint8x8_t, <<)
+__packed_shift8(psll_s_i8x8, int8x8_t, <<)
+__packed_shift16(psll_s_u16x4, uint16x4_t, <<)
+__packed_shift16(psll_s_i16x4, int16x4_t, <<)
+__packed_shift32(psll_s_u32x2, uint32x2_t, <<)
+__packed_shift32(psll_s_i32x2, int32x2_t, <<)
+__packed_shift8(psrl_s_u8x8, uint8x8_t, >>)
+__packed_shift16(psrl_s_u16x4, uint16x4_t, >>)
+__packed_shift32(psrl_s_u32x2, uint32x2_t, >>)
+__packed_shift8(psra_s_i8x8, int8x8_t, >>)
+__packed_shift16(psra_s_i16x4, int16x4_t, >>)
+__packed_shift32(psra_s_i32x2, int32x2_t, >>)
+
/* Packed Logical Operations (32-bit) */
__packed_binary_op(pand_i8x4, int8x4_t, &)
__packed_binary_op(pand_u8x4, uint8x4_t, &)
diff --git a/clang/test/CodeGen/RISCV/rvp-intrinsics.c b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
index b7bd0458ca297..73db0bee19def 100644
--- a/clang/test/CodeGen/RISCV/rvp-intrinsics.c
+++ b/clang/test/CodeGen/RISCV/rvp-intrinsics.c
@@ -212,600 +212,588 @@ int32x2_t test_pmv_s_i32x2(int32_t x) {
return __riscv_pmv_s_i32x2(x);
}
-/* Packed Shifts (32-bit) */
+/* Packed Addition and Subtraction (32-bit) */
-// RV32-LABEL: define dso_local i32 @test_psll_s_i8x4(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i32 @test_padd_i8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
-// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
-// RV32-NEXT: ret i32 [[TMP4]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_psll_s_i8x4(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_padd_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
-// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
-// RV64-NEXT: ret i32 [[TMP4]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
//
-int8x4_t test_psll_s_i8x4(int8x4_t a, unsigned shamt) {
- return __riscv_psll_s_i8x4(a, shamt);
+int8x4_t test_padd_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_padd_i8x4(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psll_s_u8x4(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i32 @test_padd_u8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
-// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
-// RV32-NEXT: ret i32 [[TMP4]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_psll_s_u8x4(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_padd_u8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
-// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
-// RV64-NEXT: ret i32 [[TMP4]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
//
-uint8x4_t test_psll_s_u8x4(uint8x4_t a, unsigned shamt) {
- return __riscv_psll_s_u8x4(a, shamt);
+uint8x4_t test_padd_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_padd_u8x4(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psll_s_i16x2(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i32 @test_padd_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
-// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
-// RV32-NEXT: ret i32 [[TMP4]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_psll_s_i16x2(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_padd_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
-// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
-// RV64-NEXT: ret i32 [[TMP4]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
//
-int16x2_t test_psll_s_i16x2(int16x2_t a, unsigned shamt) {
- return __riscv_psll_s_i16x2(a, shamt);
+int16x2_t test_padd_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_padd_i16x2(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psll_s_u16x2(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i32 @test_padd_u16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
-// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
-// RV32-NEXT: ret i32 [[TMP4]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_psll_s_u16x2(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_padd_u16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
-// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
-// RV64-NEXT: ret i32 [[TMP4]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
//
-uint16x2_t test_psll_s_u16x2(uint16x2_t a, unsigned shamt) {
- return __riscv_psll_s_u16x2(a, shamt);
+uint16x2_t test_padd_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_padd_u16x2(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psra_s_i8x4(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i32 @test_psub_i8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
-// RV32-NEXT: [[SHR_I:%.*]] = ashr <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
-// RV32-NEXT: ret i32 [[TMP4]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_psra_s_i8x4(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psub_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
-// RV64-NEXT: [[SHR_I:%.*]] = ashr <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
-// RV64-NEXT: ret i32 [[TMP4]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
//
-int8x4_t test_psra_s_i8x4(int8x4_t a, unsigned shamt) {
- return __riscv_psra_s_i8x4(a, shamt);
+int8x4_t test_psub_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_psub_i8x4(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psrl_s_u8x4(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i32 @test_psub_u8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
-// RV32-NEXT: [[SHR_I:%.*]] = lshr <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
-// RV32-NEXT: ret i32 [[TMP4]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_psrl_s_u8x4(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psub_u8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
-// RV64-NEXT: [[SHR_I:%.*]] = lshr <4 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
-// RV64-NEXT: ret i32 [[TMP4]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
//
-uint8x4_t test_psrl_s_u8x4(uint8x4_t a, unsigned shamt) {
- return __riscv_psrl_s_u8x4(a, shamt);
+uint8x4_t test_psub_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_psub_u8x4(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psra_s_i16x2(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i32 @test_psub_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
-// RV32-NEXT: [[SHR_I:%.*]] = ashr <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
-// RV32-NEXT: ret i32 [[TMP4]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_psra_s_i16x2(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psub_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
-// RV64-NEXT: [[SHR_I:%.*]] = ashr <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
-// RV64-NEXT: ret i32 [[TMP4]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
//
-int16x2_t test_psra_s_i16x2(int16x2_t a, unsigned shamt) {
- return __riscv_psra_s_i16x2(a, shamt);
+int16x2_t test_psub_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_psub_i16x2(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psrl_s_u16x2(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i32 @test_psub_u16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
-// RV32-NEXT: [[SHR_I:%.*]] = lshr <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
-// RV32-NEXT: ret i32 [[TMP4]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_psrl_s_u16x2(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psub_u16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
-// RV64-NEXT: [[SHR_I:%.*]] = lshr <2 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
-// RV64-NEXT: ret i32 [[TMP4]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
//
-uint16x2_t test_psrl_s_u16x2(uint16x2_t a, unsigned shamt) {
- return __riscv_psrl_s_u16x2(a, shamt);
+uint16x2_t test_psub_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_psub_u16x2(a, b);
}
-/* Packed Shifts (64-bit) */
+// RV32-LABEL: define dso_local i32 @test_pneg_i8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i8> zeroinitializer, [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV32-NEXT: ret i32 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i32 @test_pneg_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i8> zeroinitializer, [[TMP0]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV64-NEXT: ret i32 [[TMP1]]
+//
+int8x4_t test_pneg_i8x4(int8x4_t a) {
+ return __riscv_pneg_i8x4(a);
+}
-// RV32-LABEL: define dso_local i64 @test_psll_s_i8x8(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i32 @test_pneg_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i16> zeroinitializer, [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV32-NEXT: ret i32 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i32 @test_pneg_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i16> zeroinitializer, [[TMP0]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV64-NEXT: ret i32 [[TMP1]]
+//
+int16x2_t test_pneg_i16x2(int16x2_t a) {
+ return __riscv_pneg_i16x2(a);
+}
+
+/* Packed Addition and Subtraction (64-bit) */
+
+// RV32-LABEL: define dso_local i64 @test_padd_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
-// RV32-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
-// RV32-NEXT: ret i64 [[TMP4]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psll_s_i8x8(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_padd_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
-// RV64-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
-// RV64-NEXT: ret i64 [[TMP4]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
//
-int8x8_t test_psll_s_i8x8(int8x8_t a, unsigned shamt) {
- return __riscv_psll_s_i8x8(a, shamt);
+int8x8_t test_padd_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_padd_i8x8(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psll_s_u8x8(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_padd_u8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
-// RV32-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
-// RV32-NEXT: ret i64 [[TMP4]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psll_s_u8x8(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_padd_u8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
-// RV64-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
-// RV64-NEXT: ret i64 [[TMP4]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
//
-uint8x8_t test_psll_s_u8x8(uint8x8_t a, unsigned shamt) {
- return __riscv_psll_s_u8x8(a, shamt);
+uint8x8_t test_padd_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_padd_u8x8(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psll_s_i16x4(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_padd_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
-// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
-// RV32-NEXT: ret i64 [[TMP4]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psll_s_i16x4(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_padd_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
-// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
-// RV64-NEXT: ret i64 [[TMP4]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
//
-int16x4_t test_psll_s_i16x4(int16x4_t a, unsigned shamt) {
- return __riscv_psll_s_i16x4(a, shamt);
+int16x4_t test_padd_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_padd_i16x4(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psll_s_u16x4(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_padd_u16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
-// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
-// RV32-NEXT: ret i64 [[TMP4]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psll_s_u16x4(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_padd_u16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
-// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
-// RV64-NEXT: ret i64 [[TMP4]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
//
-uint16x4_t test_psll_s_u16x4(uint16x4_t a, unsigned shamt) {
- return __riscv_psll_s_u16x4(a, shamt);
+uint16x4_t test_padd_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_padd_u16x4(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psll_s_i32x2(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_padd_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
-// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
-// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
-// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
-// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
-// RV32-NEXT: ret i64 [[TMP1]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psll_s_i32x2(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_padd_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
-// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
-// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
-// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
-// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
-// RV64-NEXT: ret i64 [[TMP1]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
//
-int32x2_t test_psll_s_i32x2(int32x2_t a, unsigned shamt) {
- return __riscv_psll_s_i32x2(a, shamt);
+int32x2_t test_padd_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_padd_i32x2(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psll_s_u32x2(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_padd_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
-// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
-// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
-// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
-// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
-// RV32-NEXT: ret i64 [[TMP1]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psll_s_u32x2(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_padd_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
-// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
-// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
-// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
-// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
-// RV64-NEXT: ret i64 [[TMP1]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
//
-uint32x2_t test_psll_s_u32x2(uint32x2_t a, unsigned shamt) {
- return __riscv_psll_s_u32x2(a, shamt);
+uint32x2_t test_padd_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_padd_u32x2(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psra_s_i8x8(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_psub_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
-// RV32-NEXT: [[SHR_I:%.*]] = ashr <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
-// RV32-NEXT: ret i64 [[TMP4]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psra_s_i8x8(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_psub_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
-// RV64-NEXT: [[SHR_I:%.*]] = ashr <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
-// RV64-NEXT: ret i64 [[TMP4]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
//
-int8x8_t test_psra_s_i8x8(int8x8_t a, unsigned shamt) {
- return __riscv_psra_s_i8x8(a, shamt);
+int8x8_t test_psub_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_psub_i8x8(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psrl_s_u8x8(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_psub_u8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
-// RV32-NEXT: [[SHR_I:%.*]] = lshr <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
-// RV32-NEXT: ret i64 [[TMP4]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psrl_s_u8x8(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_psub_u8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
-// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
-// RV64-NEXT: [[SHR_I:%.*]] = lshr <8 x i8> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
-// RV64-NEXT: ret i64 [[TMP4]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
//
-uint8x8_t test_psrl_s_u8x8(uint8x8_t a, unsigned shamt) {
- return __riscv_psrl_s_u8x8(a, shamt);
+uint8x8_t test_psub_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_psub_u8x8(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psra_s_i16x4(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_psub_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
-// RV32-NEXT: [[SHR_I:%.*]] = ashr <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
-// RV32-NEXT: ret i64 [[TMP4]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psra_s_i16x4(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_psub_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
-// RV64-NEXT: [[SHR_I:%.*]] = ashr <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
-// RV64-NEXT: ret i64 [[TMP4]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
//
-int16x4_t test_psra_s_i16x4(int16x4_t a, unsigned shamt) {
- return __riscv_psra_s_i16x4(a, shamt);
+int16x4_t test_psub_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_psub_i16x4(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psrl_s_u16x4(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_psub_u16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
-// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
-// RV32-NEXT: [[SHR_I:%.*]] = lshr <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
-// RV32-NEXT: ret i64 [[TMP4]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psrl_s_u16x4(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_psub_u16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
-// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
-// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
-// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
-// RV64-NEXT: [[SHR_I:%.*]] = lshr <4 x i16> [[TMP0]], [[SH_PROM_I]]
-// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
-// RV64-NEXT: ret i64 [[TMP4]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
//
-uint16x4_t test_psrl_s_u16x4(uint16x4_t a, unsigned shamt) {
- return __riscv_psrl_s_u16x4(a, shamt);
+uint16x4_t test_psub_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_psub_u16x4(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psra_s_i32x2(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_psub_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
-// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
-// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
-// RV32-NEXT: [[SHR_I:%.*]] = ashr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
-// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
-// RV32-NEXT: ret i64 [[TMP1]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psra_s_i32x2(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_psub_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
-// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
-// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
-// RV64-NEXT: [[SHR_I:%.*]] = ashr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
-// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
-// RV64-NEXT: ret i64 [[TMP1]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
//
-int32x2_t test_psra_s_i32x2(int32x2_t a, unsigned shamt) {
- return __riscv_psra_s_i32x2(a, shamt);
+int32x2_t test_psub_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_psub_i32x2(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psrl_s_u32x2(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_psub_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
-// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
-// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
-// RV32-NEXT: [[SHR_I:%.*]] = lshr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
-// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
-// RV32-NEXT: ret i64 [[TMP1]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psrl_s_u32x2(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_psub_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
-// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
-// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
-// RV64-NEXT: [[SHR_I:%.*]] = lshr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
-// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
-// RV64-NEXT: ret i64 [[TMP1]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
//
-uint32x2_t test_psrl_s_u32x2(uint32x2_t a, unsigned shamt) {
- return __riscv_psrl_s_u32x2(a, shamt);
+uint32x2_t test_psub_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_psub_u32x2(a, b);
}
-/* Packed Addition with Scalar (32-bit) */
-
-// RV32-LABEL: define dso_local i32 @test_padd_s_u8x4(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i8 noundef zeroext [[B:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_pneg_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0
-// RV32-NEXT: [[VECINIT4_I:%.*]] = shufflevector <4 x i8> [[VECINIT_I]], <4 x i8> poison, <4 x i32> zeroinitializer
-// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[VECINIT4_I]], [[TMP0]]
-// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
-// RV32-NEXT: ret i32 [[TMP1]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
//
-// RV64-LABEL: define dso_local i32 @test_padd_s_u8x4(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i8 noundef zeroext [[B:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_pneg_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0
-// RV64-NEXT: [[VECINIT4_I:%.*]] = shufflevector <4 x i8> [[VECINIT_I]], <4 x i8> poison, <4 x i32> zeroinitializer
-// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[VECINIT4_I]], [[TMP0]]
-// RV64-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
-// RV64-NEXT: ret i32 [[TMP1]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, [[TMP0]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
//
-uint8x4_t test_padd_s_u8x4(uint8x4_t a, uint8_t b) {
- return __riscv_padd_s_u8x4(a, b);
+int8x8_t test_pneg_i8x8(int8x8_t a) {
+ return __riscv_pneg_i8x8(a);
}
-// RV32-LABEL: define dso_local i32 @test_padd_s_i8x4(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i8 noundef signext [[B:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_pneg_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0
-// RV32-NEXT: [[VECINIT4_I:%.*]] = shufflevector <4 x i8> [[VECINIT_I]], <4 x i8> poison, <4 x i32> zeroinitializer
-// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[VECINIT4_I]], [[TMP0]]
-// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
-// RV32-NEXT: ret i32 [[TMP1]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
//
-// RV64-LABEL: define dso_local i32 @test_padd_s_i8x4(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i8 noundef signext [[B:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_pneg_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, [[TMP0]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
+//
+int16x4_t test_pneg_i16x4(int16x4_t a) {
+ return __riscv_pneg_i16x4(a);
+}
+
+// RV32-LABEL: define dso_local i64 @test_pneg_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i64 @test_pneg_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, [[TMP0]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
+//
+int32x2_t test_pneg_i32x2(int32x2_t a) {
+ return __riscv_pneg_i32x2(a);
+}
+
+/* Packed Addition with Scalar (32-bit) */
+
+// RV32-LABEL: define dso_local i32 @test_padd_s_u8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i8 noundef zeroext [[B:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0
+// RV32-NEXT: [[VECINIT4_I:%.*]] = shufflevector <4 x i8> [[VECINIT_I]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[VECINIT4_I]], [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i32 @test_padd_s_u8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i8 noundef zeroext [[B:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0
+// RV64-NEXT: [[VECINIT4_I:%.*]] = shufflevector <4 x i8> [[VECINIT_I]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[VECINIT4_I]], [[TMP0]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV64-NEXT: ret i32 [[TMP1]]
+//
+uint8x4_t test_padd_s_u8x4(uint8x4_t a, uint8_t b) {
+ return __riscv_padd_s_u8x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_padd_s_i8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i8 noundef signext [[B:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0
+// RV32-NEXT: [[VECINIT4_I:%.*]] = shufflevector <4 x i8> [[VECINIT_I]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[VECINIT4_I]], [[TMP0]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP1]]
+//
+// RV64-LABEL: define dso_local i32 @test_padd_s_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i8 noundef signext [[B:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV64-NEXT: [[VECINIT_I:%.*]] = insertelement <4 x i8> poison, i8 [[B]], i64 0
@@ -1012,1656 +1000,1668 @@ int32x2_t test_padd_s_i32x2(int32x2_t a, int32_t b) {
return __riscv_padd_s_i32x2(a, b);
}
-/* Packed Addition and Subtraction (32-bit) */
+/* Packed Saturating Addition and Subtraction (32-bit) */
-// RV32-LABEL: define dso_local i32 @test_padd_i8x4(
+// RV32-LABEL: define dso_local i32 @test_psadd_i8x4(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_padd_i8x4(
+// RV64-LABEL: define dso_local i32 @test_psadd_i8x4(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-int8x4_t test_padd_i8x4(int8x4_t a, int8x4_t b) {
- return __riscv_padd_i8x4(a, b);
+int8x4_t test_psadd_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_psadd_i8x4(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_padd_u8x4(
+// RV32-LABEL: define dso_local i32 @test_psadd_i16x2(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_padd_u8x4(
+// RV64-LABEL: define dso_local i32 @test_psadd_i16x2(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i8> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ADD_I]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-uint8x4_t test_padd_u8x4(uint8x4_t a, uint8x4_t b) {
- return __riscv_padd_u8x4(a, b);
+int16x2_t test_psadd_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_psadd_i16x2(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_padd_i16x2(
+// RV32-LABEL: define dso_local i32 @test_psaddu_u8x4(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_padd_i16x2(
+// RV64-LABEL: define dso_local i32 @test_psaddu_u8x4(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-int16x2_t test_padd_i16x2(int16x2_t a, int16x2_t b) {
- return __riscv_padd_i16x2(a, b);
+uint8x4_t test_psaddu_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_psaddu_u8x4(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_padd_u16x2(
+// RV32-LABEL: define dso_local i32 @test_psaddu_u16x2(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_padd_u16x2(
+// RV64-LABEL: define dso_local i32 @test_psaddu_u16x2(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-uint16x2_t test_padd_u16x2(uint16x2_t a, uint16x2_t b) {
- return __riscv_padd_u16x2(a, b);
+uint16x2_t test_psaddu_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_psaddu_u16x2(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psub_i8x4(
+// RV32-LABEL: define dso_local i32 @test_pssub_i8x4(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_psub_i8x4(
+// RV64-LABEL: define dso_local i32 @test_pssub_i8x4(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-int8x4_t test_psub_i8x4(int8x4_t a, int8x4_t b) {
- return __riscv_psub_i8x4(a, b);
+int8x4_t test_pssub_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_pssub_i8x4(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psub_u8x4(
+// RV32-LABEL: define dso_local i32 @test_pssub_i16x2(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_psub_u8x4(
+// RV64-LABEL: define dso_local i32 @test_pssub_i16x2(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i8> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-uint8x4_t test_psub_u8x4(uint8x4_t a, uint8x4_t b) {
- return __riscv_psub_u8x4(a, b);
+int16x2_t test_pssub_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_pssub_i16x2(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psub_i16x2(
+// RV32-LABEL: define dso_local i32 @test_pssubu_u8x4(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_psub_i16x2(
+// RV64-LABEL: define dso_local i32 @test_pssubu_u8x4(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-int16x2_t test_psub_i16x2(int16x2_t a, int16x2_t b) {
- return __riscv_psub_i16x2(a, b);
+uint8x4_t test_pssubu_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_pssubu_u8x4(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psub_u16x2(
+// RV32-LABEL: define dso_local i32 @test_pssubu_u16x2(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_psub_u16x2(
+// RV64-LABEL: define dso_local i32 @test_pssubu_u16x2(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i16> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-uint16x2_t test_psub_u16x2(uint16x2_t a, uint16x2_t b) {
- return __riscv_psub_u16x2(a, b);
+uint16x2_t test_pssubu_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_pssubu_u16x2(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_pneg_i8x4(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+/* Packed Saturating Addition and Subtraction (64-bit) */
+
+// RV32-LABEL: define dso_local i64 @test_psadd_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i8> zeroinitializer, [[TMP0]]
-// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
-// RV32-NEXT: ret i32 [[TMP1]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_pneg_i8x4(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_psadd_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i8> zeroinitializer, [[TMP0]]
-// RV64-NEXT: [[TMP1:%.*]] = bitcast <4 x i8> [[SUB_I]] to i32
-// RV64-NEXT: ret i32 [[TMP1]]
-//
-int8x4_t test_pneg_i8x4(int8x4_t a) {
- return __riscv_pneg_i8x4(a);
-}
-
-// RV32-LABEL: define dso_local i32 @test_pneg_i16x2(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
-// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i16> zeroinitializer, [[TMP0]]
-// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
-// RV32-NEXT: ret i32 [[TMP1]]
-//
-// RV64-LABEL: define dso_local i32 @test_pneg_i16x2(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
-// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i16> zeroinitializer, [[TMP0]]
-// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i16> [[SUB_I]] to i32
-// RV64-NEXT: ret i32 [[TMP1]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
//
-int16x2_t test_pneg_i16x2(int16x2_t a) {
- return __riscv_pneg_i16x2(a);
+int8x8_t test_psadd_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_psadd_i8x8(a, b);
}
-/* Packed Addition and Subtraction (64-bit) */
-
-// RV32-LABEL: define dso_local i64 @test_padd_i8x8(
+// RV32-LABEL: define dso_local i64 @test_psadd_i16x4(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV32-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_padd_i8x8(
+// RV64-LABEL: define dso_local i64 @test_psadd_i16x4(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV64-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-int8x8_t test_padd_i8x8(int8x8_t a, int8x8_t b) {
- return __riscv_padd_i8x8(a, b);
+int16x4_t test_psadd_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_psadd_i16x4(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_padd_u8x8(
+// RV32-LABEL: define dso_local i64 @test_psadd_i32x2(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV32-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_padd_u8x8(
+// RV64-LABEL: define dso_local i64 @test_psadd_i32x2(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV64-NEXT: [[ADD_I:%.*]] = add <8 x i8> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ADD_I]] to i64
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-uint8x8_t test_padd_u8x8(uint8x8_t a, uint8x8_t b) {
- return __riscv_padd_u8x8(a, b);
+int32x2_t test_psadd_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_psadd_i32x2(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_padd_i16x4(
+// RV32-LABEL: define dso_local i64 @test_psaddu_u8x8(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_padd_i16x4(
+// RV64-LABEL: define dso_local i64 @test_psaddu_u8x8(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-int16x4_t test_padd_i16x4(int16x4_t a, int16x4_t b) {
- return __riscv_padd_i16x4(a, b);
+uint8x8_t test_psaddu_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_psaddu_u8x8(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_padd_u16x4(
+// RV32-LABEL: define dso_local i64 @test_psaddu_u16x4(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_padd_u16x4(
+// RV64-LABEL: define dso_local i64 @test_psaddu_u16x4(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-uint16x4_t test_padd_u16x4(uint16x4_t a, uint16x4_t b) {
- return __riscv_padd_u16x4(a, b);
+uint16x4_t test_psaddu_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_psaddu_u16x4(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_padd_i32x2(
+// RV32-LABEL: define dso_local i64 @test_psaddu_u32x2(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_padd_i32x2(
+// RV64-LABEL: define dso_local i64 @test_psaddu_u32x2(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-int32x2_t test_padd_i32x2(int32x2_t a, int32x2_t b) {
- return __riscv_padd_i32x2(a, b);
+uint32x2_t test_psaddu_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_psaddu_u32x2(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_padd_u32x2(
+// RV32-LABEL: define dso_local i64 @test_pssub_i8x8(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_padd_u32x2(
+// RV64-LABEL: define dso_local i64 @test_pssub_i8x8(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-uint32x2_t test_padd_u32x2(uint32x2_t a, uint32x2_t b) {
- return __riscv_padd_u32x2(a, b);
+int8x8_t test_pssub_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_pssub_i8x8(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psub_i8x8(
+// RV32-LABEL: define dso_local i64 @test_pssub_i16x4(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psub_i8x8(
+// RV64-LABEL: define dso_local i64 @test_pssub_i16x4(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-int8x8_t test_psub_i8x8(int8x8_t a, int8x8_t b) {
- return __riscv_psub_i8x8(a, b);
+int16x4_t test_pssub_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_pssub_i16x4(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psub_u8x8(
+// RV32-LABEL: define dso_local i64 @test_pssub_i32x2(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psub_u8x8(
+// RV64-LABEL: define dso_local i64 @test_pssub_i32x2(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <8 x i8> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-uint8x8_t test_psub_u8x8(uint8x8_t a, uint8x8_t b) {
- return __riscv_psub_u8x8(a, b);
+int32x2_t test_pssub_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_pssub_i32x2(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psub_i16x4(
+// RV32-LABEL: define dso_local i64 @test_pssubu_u8x8(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psub_i16x4(
+// RV64-LABEL: define dso_local i64 @test_pssubu_u8x8(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-int16x4_t test_psub_i16x4(int16x4_t a, int16x4_t b) {
- return __riscv_psub_i16x4(a, b);
+uint8x8_t test_pssubu_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_pssubu_u8x8(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psub_u16x4(
+// RV32-LABEL: define dso_local i64 @test_pssubu_u16x4(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psub_u16x4(
+// RV64-LABEL: define dso_local i64 @test_pssubu_u16x4(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i16> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-uint16x4_t test_psub_u16x4(uint16x4_t a, uint16x4_t b) {
- return __riscv_psub_u16x4(a, b);
+uint16x4_t test_pssubu_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_pssubu_u16x4(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psub_i32x2(
+// RV32-LABEL: define dso_local i64 @test_pssubu_u32x2(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psub_i32x2(
+// RV64-LABEL: define dso_local i64 @test_pssubu_u32x2(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-int32x2_t test_psub_i32x2(int32x2_t a, int32x2_t b) {
- return __riscv_psub_i32x2(a, b);
+uint32x2_t test_pssubu_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_pssubu_u32x2(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psub_u32x2(
+/* Packed Shift-Add (32-bit) */
+
+// RV32-LABEL: define dso_local i32 @test_psh1add_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], splat (i16 1)
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[SHL_I]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_psh1add_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], splat (i16 1)
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[SHL_I]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+int16x2_t test_psh1add_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_psh1add_i16x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_psh1add_u16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], splat (i16 1)
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[SHL_I]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_psh1add_u16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], splat (i16 1)
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[SHL_I]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+uint16x2_t test_psh1add_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_psh1add_u16x2(a, b);
+}
+
+// RV32-LABEL: define dso_local i32 @test_pssh1sadd_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP0]])
+// RV32-NEXT: [[ELT_SAT3_I:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[ELT_SAT_I]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT3_I]] to i32
+// RV32-NEXT: ret i32 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i32 @test_pssh1sadd_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP0]])
+// RV64-NEXT: [[ELT_SAT3_I:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[ELT_SAT_I]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT3_I]] to i32
+// RV64-NEXT: ret i32 [[TMP2]]
+//
+int16x2_t test_pssh1sadd_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_pssh1sadd_i16x2(a, b);
+}
+
+/* Packed Shift-Add (64-bit) */
+
+// RV32-LABEL: define dso_local i64 @test_psh1add_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], splat (i16 1)
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[SHL_I]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psh1add_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], splat (i16 1)
+// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[SHL_I]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+int16x4_t test_psh1add_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_psh1add_i16x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psh1add_u16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-NEXT: [[ENTRY:.*:]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], splat (i16 1)
+// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[SHL_I]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
+//
+// RV64-LABEL: define dso_local i64 @test_psh1add_u16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-NEXT: [[ENTRY:.*:]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], splat (i16 1)
+// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[SHL_I]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
+//
+uint16x4_t test_psh1add_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_psh1add_u16x4(a, b);
+}
+
+// RV32-LABEL: define dso_local i64 @test_psh1add_i32x2(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
+// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], splat (i32 1)
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[SHL_I]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psub_u32x2(
+// RV64-LABEL: define dso_local i64 @test_psh1add_i32x2(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i32> [[TMP0]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
+// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], splat (i32 1)
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[SHL_I]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-uint32x2_t test_psub_u32x2(uint32x2_t a, uint32x2_t b) {
- return __riscv_psub_u32x2(a, b);
+int32x2_t test_psh1add_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_psh1add_i32x2(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_pneg_i8x8(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_psh1add_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, [[TMP0]]
-// RV32-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
-// RV32-NEXT: ret i64 [[TMP1]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], splat (i32 1)
+// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[SHL_I]], [[TMP1]]
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_pneg_i8x8(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_psh1add_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <8 x i8> zeroinitializer, [[TMP0]]
-// RV64-NEXT: [[TMP1:%.*]] = bitcast <8 x i8> [[SUB_I]] to i64
-// RV64-NEXT: ret i64 [[TMP1]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], splat (i32 1)
+// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[SHL_I]], [[TMP1]]
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
//
-int8x8_t test_pneg_i8x8(int8x8_t a) {
- return __riscv_pneg_i8x8(a);
+uint32x2_t test_psh1add_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_psh1add_u32x2(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_pneg_i16x4(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_pssh1sadd_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, [[TMP0]]
-// RV32-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
-// RV32-NEXT: ret i64 [[TMP1]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP0]])
+// RV32-NEXT: [[ELT_SAT3_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> [[ELT_SAT_I]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT3_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_pneg_i16x4(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_pssh1sadd_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <4 x i16> zeroinitializer, [[TMP0]]
-// RV64-NEXT: [[TMP1:%.*]] = bitcast <4 x i16> [[SUB_I]] to i64
-// RV64-NEXT: ret i64 [[TMP1]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP0]])
+// RV64-NEXT: [[ELT_SAT3_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> [[ELT_SAT_I]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT3_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
//
-int16x4_t test_pneg_i16x4(int16x4_t a) {
- return __riscv_pneg_i16x4(a);
+int16x4_t test_pssh1sadd_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_pssh1sadd_i16x4(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_pneg_i32x2(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_pssh1sadd_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, [[TMP0]]
-// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
-// RV32-NEXT: ret i64 [[TMP1]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP0]])
+// RV32-NEXT: [[ELT_SAT3_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[ELT_SAT_I]], <2 x i32> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT3_I]] to i64
+// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_pneg_i32x2(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_pssh1sadd_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[SUB_I:%.*]] = sub <2 x i32> zeroinitializer, [[TMP0]]
-// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SUB_I]] to i64
-// RV64-NEXT: ret i64 [[TMP1]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
+// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP0]])
+// RV64-NEXT: [[ELT_SAT3_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[ELT_SAT_I]], <2 x i32> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT3_I]] to i64
+// RV64-NEXT: ret i64 [[TMP2]]
//
-int32x2_t test_pneg_i32x2(int32x2_t a) {
- return __riscv_pneg_i32x2(a);
+int32x2_t test_pssh1sadd_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_pssh1sadd_i32x2(a, b);
}
-/* Packed Saturating Addition and Subtraction (32-bit) */
+/* Packed Minimum and Maximum (32-bit) */
-// RV32-LABEL: define dso_local i32 @test_psadd_i8x4(
+// RV32-LABEL: define dso_local i32 @test_pmin_i8x4(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
+// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MIN_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_psadd_i8x4(
+// RV64-LABEL: define dso_local i32 @test_pmin_i8x4(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
+// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MIN_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-int8x4_t test_psadd_i8x4(int8x4_t a, int8x4_t b) {
- return __riscv_psadd_i8x4(a, b);
+int8x4_t test_pmin_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_pmin_i8x4(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psadd_i16x2(
+// RV32-LABEL: define dso_local i32 @test_pmin_i16x2(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
+// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MIN_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_psadd_i16x2(
+// RV64-LABEL: define dso_local i32 @test_pmin_i16x2(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
+// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MIN_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-int16x2_t test_psadd_i16x2(int16x2_t a, int16x2_t b) {
- return __riscv_psadd_i16x2(a, b);
+int16x2_t test_pmin_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_pmin_i16x2(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psaddu_u8x4(
+// RV32-LABEL: define dso_local i32 @test_pminu_u8x4(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
+// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MIN_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_psaddu_u8x4(
+// RV64-LABEL: define dso_local i32 @test_pminu_u8x4(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
+// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MIN_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-uint8x4_t test_psaddu_u8x4(uint8x4_t a, uint8x4_t b) {
- return __riscv_psaddu_u8x4(a, b);
+uint8x4_t test_pminu_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_pminu_u8x4(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_psaddu_u16x2(
+// RV32-LABEL: define dso_local i32 @test_pminu_u16x2(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
+// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MIN_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_psaddu_u16x2(
+// RV64-LABEL: define dso_local i32 @test_pminu_u16x2(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
+// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MIN_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-uint16x2_t test_psaddu_u16x2(uint16x2_t a, uint16x2_t b) {
- return __riscv_psaddu_u16x2(a, b);
+uint16x2_t test_pminu_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_pminu_u16x2(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_pssub_i8x4(
+// RV32-LABEL: define dso_local i32 @test_pmax_i8x4(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
+// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MAX_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_pssub_i8x4(
+// RV64-LABEL: define dso_local i32 @test_pmax_i8x4(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
+// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MAX_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-int8x4_t test_pssub_i8x4(int8x4_t a, int8x4_t b) {
- return __riscv_pssub_i8x4(a, b);
+int8x4_t test_pmax_i8x4(int8x4_t a, int8x4_t b) {
+ return __riscv_pmax_i8x4(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_pssub_i16x2(
+// RV32-LABEL: define dso_local i32 @test_pmax_i16x2(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
+// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MAX_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_pssub_i16x2(
+// RV64-LABEL: define dso_local i32 @test_pmax_i16x2(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
+// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MAX_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-int16x2_t test_pssub_i16x2(int16x2_t a, int16x2_t b) {
- return __riscv_pssub_i16x2(a, b);
+int16x2_t test_pmax_i16x2(int16x2_t a, int16x2_t b) {
+ return __riscv_pmax_i16x2(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_pssubu_u8x4(
+// RV32-LABEL: define dso_local i32 @test_pmaxu_u8x4(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
+// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MAX_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_pssubu_u8x4(
+// RV64-LABEL: define dso_local i32 @test_pmaxu_u8x4(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_SAT_I]] to i32
+// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MAX_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-uint8x4_t test_pssubu_u8x4(uint8x4_t a, uint8x4_t b) {
- return __riscv_pssubu_u8x4(a, b);
+uint8x4_t test_pmaxu_u8x4(uint8x4_t a, uint8x4_t b) {
+ return __riscv_pmaxu_u8x4(a, b);
}
-// RV32-LABEL: define dso_local i32 @test_pssubu_u16x2(
+// RV32-LABEL: define dso_local i32 @test_pmaxu_u16x2(
// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
+// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MAX_I]] to i32
// RV32-NEXT: ret i32 [[TMP2]]
//
-// RV64-LABEL: define dso_local i32 @test_pssubu_u16x2(
+// RV64-LABEL: define dso_local i32 @test_pmaxu_u16x2(
// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT_I]] to i32
+// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MAX_I]] to i32
// RV64-NEXT: ret i32 [[TMP2]]
//
-uint16x2_t test_pssubu_u16x2(uint16x2_t a, uint16x2_t b) {
- return __riscv_pssubu_u16x2(a, b);
+uint16x2_t test_pmaxu_u16x2(uint16x2_t a, uint16x2_t b) {
+ return __riscv_pmaxu_u16x2(a, b);
}
-/* Packed Saturating Addition and Subtraction (64-bit) */
+/* Packed Minimum and Maximum (64-bit) */
-// RV32-LABEL: define dso_local i64 @test_psadd_i8x8(
+// RV32-LABEL: define dso_local i64 @test_pmin_i8x8(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
+// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <8 x i8> @llvm.smin.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MIN_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psadd_i8x8(
+// RV64-LABEL: define dso_local i64 @test_pmin_i8x8(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
+// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <8 x i8> @llvm.smin.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MIN_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-int8x8_t test_psadd_i8x8(int8x8_t a, int8x8_t b) {
- return __riscv_psadd_i8x8(a, b);
+int8x8_t test_pmin_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_pmin_i8x8(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psadd_i16x4(
+// RV32-LABEL: define dso_local i64 @test_pmin_i16x4(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
+// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MIN_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psadd_i16x4(
+// RV64-LABEL: define dso_local i64 @test_pmin_i16x4(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
+// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MIN_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-int16x4_t test_psadd_i16x4(int16x4_t a, int16x4_t b) {
- return __riscv_psadd_i16x4(a, b);
+int16x4_t test_pmin_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_pmin_i16x4(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psadd_i32x2(
+// RV32-LABEL: define dso_local i64 @test_pmin_i32x2(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
+// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MIN_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psadd_i32x2(
+// RV64-LABEL: define dso_local i64 @test_pmin_i32x2(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
+// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MIN_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-int32x2_t test_psadd_i32x2(int32x2_t a, int32x2_t b) {
- return __riscv_psadd_i32x2(a, b);
+int32x2_t test_pmin_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_pmin_i32x2(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psaddu_u8x8(
+// RV32-LABEL: define dso_local i64 @test_pminu_u8x8(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
+// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <8 x i8> @llvm.umin.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MIN_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psaddu_u8x8(
+// RV64-LABEL: define dso_local i64 @test_pminu_u8x8(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
+// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <8 x i8> @llvm.umin.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MIN_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-uint8x8_t test_psaddu_u8x8(uint8x8_t a, uint8x8_t b) {
- return __riscv_psaddu_u8x8(a, b);
+uint8x8_t test_pminu_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_pminu_u8x8(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psaddu_u16x4(
+// RV32-LABEL: define dso_local i64 @test_pminu_u16x4(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
+// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MIN_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psaddu_u16x4(
+// RV64-LABEL: define dso_local i64 @test_pminu_u16x4(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
+// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MIN_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-uint16x4_t test_psaddu_u16x4(uint16x4_t a, uint16x4_t b) {
- return __riscv_psaddu_u16x4(a, b);
+uint16x4_t test_pminu_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_pminu_u16x4(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_psaddu_u32x2(
+// RV32-LABEL: define dso_local i64 @test_pminu_u32x2(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
+// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MIN_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_psaddu_u32x2(
+// RV64-LABEL: define dso_local i64 @test_pminu_u32x2(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.uadd.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
+// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MIN_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-uint32x2_t test_psaddu_u32x2(uint32x2_t a, uint32x2_t b) {
- return __riscv_psaddu_u32x2(a, b);
+uint32x2_t test_pminu_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_pminu_u32x2(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_pssub_i8x8(
+// RV32-LABEL: define dso_local i64 @test_pmax_i8x8(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
+// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MAX_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_pssub_i8x8(
+// RV64-LABEL: define dso_local i64 @test_pmax_i8x8(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
+// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MAX_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-int8x8_t test_pssub_i8x8(int8x8_t a, int8x8_t b) {
- return __riscv_pssub_i8x8(a, b);
+int8x8_t test_pmax_i8x8(int8x8_t a, int8x8_t b) {
+ return __riscv_pmax_i8x8(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_pssub_i16x4(
+// RV32-LABEL: define dso_local i64 @test_pmax_i16x4(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
+// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i16> @llvm.smax.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MAX_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_pssub_i16x4(
+// RV64-LABEL: define dso_local i64 @test_pmax_i16x4(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
+// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i16> @llvm.smax.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MAX_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-int16x4_t test_pssub_i16x4(int16x4_t a, int16x4_t b) {
- return __riscv_pssub_i16x4(a, b);
+int16x4_t test_pmax_i16x4(int16x4_t a, int16x4_t b) {
+ return __riscv_pmax_i16x4(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_pssub_i32x2(
+// RV32-LABEL: define dso_local i64 @test_pmax_i32x2(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
+// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MAX_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_pssub_i32x2(
+// RV64-LABEL: define dso_local i64 @test_pmax_i32x2(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.ssub.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
+// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MAX_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-int32x2_t test_pssub_i32x2(int32x2_t a, int32x2_t b) {
- return __riscv_pssub_i32x2(a, b);
+int32x2_t test_pmax_i32x2(int32x2_t a, int32x2_t b) {
+ return __riscv_pmax_i32x2(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_pssubu_u8x8(
+// RV32-LABEL: define dso_local i64 @test_pmaxu_u8x8(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
+// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <8 x i8> @llvm.umax.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MAX_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_pssubu_u8x8(
+// RV64-LABEL: define dso_local i64 @test_pmaxu_u8x8(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_SAT_I]] to i64
+// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <8 x i8> @llvm.umax.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MAX_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-uint8x8_t test_pssubu_u8x8(uint8x8_t a, uint8x8_t b) {
- return __riscv_pssubu_u8x8(a, b);
+uint8x8_t test_pmaxu_u8x8(uint8x8_t a, uint8x8_t b) {
+ return __riscv_pmaxu_u8x8(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_pssubu_u16x4(
+// RV32-LABEL: define dso_local i64 @test_pmaxu_u16x4(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
+// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i16> @llvm.umax.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MAX_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_pssubu_u16x4(
+// RV64-LABEL: define dso_local i64 @test_pmaxu_u16x4(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT_I]] to i64
+// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i16> @llvm.umax.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MAX_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-uint16x4_t test_pssubu_u16x4(uint16x4_t a, uint16x4_t b) {
- return __riscv_pssubu_u16x4(a, b);
+uint16x4_t test_pmaxu_u16x4(uint16x4_t a, uint16x4_t b) {
+ return __riscv_pmaxu_u16x4(a, b);
}
-// RV32-LABEL: define dso_local i64 @test_pssubu_u32x2(
+// RV32-LABEL: define dso_local i64 @test_pmaxu_u32x2(
// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
+// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MAX_I]] to i64
// RV32-NEXT: ret i64 [[TMP2]]
//
-// RV64-LABEL: define dso_local i64 @test_pssubu_u32x2(
+// RV64-LABEL: define dso_local i64 @test_pmaxu_u32x2(
// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.usub.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT_I]] to i64
+// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
+// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MAX_I]] to i64
// RV64-NEXT: ret i64 [[TMP2]]
//
-uint32x2_t test_pssubu_u32x2(uint32x2_t a, uint32x2_t b) {
- return __riscv_pssubu_u32x2(a, b);
+uint32x2_t test_pmaxu_u32x2(uint32x2_t a, uint32x2_t b) {
+ return __riscv_pmaxu_u32x2(a, b);
}
-/* Packed Shift-Add (32-bit) */
+/* Packed Shifts (32-bit) */
-// RV32-LABEL: define dso_local i32 @test_psh1add_i16x2(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i32 @test_psll_s_i8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], splat (i16 1)
-// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[SHL_I]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
-// RV32-NEXT: ret i32 [[TMP2]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
//
-// RV64-LABEL: define dso_local i32 @test_psh1add_i16x2(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psll_s_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], splat (i16 1)
-// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[SHL_I]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
-// RV64-NEXT: ret i32 [[TMP2]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
+// RV64-NEXT: ret i32 [[TMP4]]
//
-int16x2_t test_psh1add_i16x2(int16x2_t a, int16x2_t b) {
- return __riscv_psh1add_i16x2(a, b);
+int8x4_t test_psll_s_i8x4(int8x4_t a, unsigned shamt) {
+ return __riscv_psll_s_i8x4(a, shamt);
}
-// RV32-LABEL: define dso_local i32 @test_psh1add_u16x2(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i32 @test_psll_s_u8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], splat (i16 1)
-// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[SHL_I]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
-// RV32-NEXT: ret i32 [[TMP2]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
//
-// RV64-LABEL: define dso_local i32 @test_psh1add_u16x2(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psll_s_u8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], splat (i16 1)
-// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i16> [[SHL_I]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ADD_I]] to i32
-// RV64-NEXT: ret i32 [[TMP2]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHL_I]] to i32
+// RV64-NEXT: ret i32 [[TMP4]]
//
-uint16x2_t test_psh1add_u16x2(uint16x2_t a, uint16x2_t b) {
- return __riscv_psh1add_u16x2(a, b);
+uint8x4_t test_psll_s_u8x4(uint8x4_t a, unsigned shamt) {
+ return __riscv_psll_s_u8x4(a, shamt);
}
-// RV32-LABEL: define dso_local i32 @test_pssh1sadd_i16x2(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i32 @test_psll_s_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP0]])
-// RV32-NEXT: [[ELT_SAT3_I:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[ELT_SAT_I]], <2 x i16> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT3_I]] to i32
-// RV32-NEXT: ret i32 [[TMP2]]
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
//
-// RV64-LABEL: define dso_local i32 @test_pssh1sadd_i16x2(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psll_s_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP0]])
-// RV64-NEXT: [[ELT_SAT3_I:%.*]] = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> [[ELT_SAT_I]], <2 x i16> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_SAT3_I]] to i32
-// RV64-NEXT: ret i32 [[TMP2]]
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
+// RV64-NEXT: ret i32 [[TMP4]]
//
-int16x2_t test_pssh1sadd_i16x2(int16x2_t a, int16x2_t b) {
- return __riscv_pssh1sadd_i16x2(a, b);
+int16x2_t test_psll_s_i16x2(int16x2_t a, unsigned shamt) {
+ return __riscv_psll_s_i16x2(a, shamt);
}
-/* Packed Shift-Add (64-bit) */
-
-// RV32-LABEL: define dso_local i64 @test_psh1add_i16x4(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i32 @test_psll_s_u16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], splat (i16 1)
-// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[SHL_I]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
//
-// RV64-LABEL: define dso_local i64 @test_psh1add_i16x4(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psll_s_u16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], splat (i16 1)
-// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[SHL_I]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHL_I]] to i32
+// RV64-NEXT: ret i32 [[TMP4]]
//
-int16x4_t test_psh1add_i16x4(int16x4_t a, int16x4_t b) {
- return __riscv_psh1add_i16x4(a, b);
+uint16x2_t test_psll_s_u16x2(uint16x2_t a, unsigned shamt) {
+ return __riscv_psll_s_u16x2(a, shamt);
}
-// RV32-LABEL: define dso_local i64 @test_psh1add_u16x4(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i32 @test_psra_s_i8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], splat (i16 1)
-// RV32-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[SHL_I]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = ashr <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
//
-// RV64-LABEL: define dso_local i64 @test_psh1add_u16x4(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psra_s_i8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], splat (i16 1)
-// RV64-NEXT: [[ADD_I:%.*]] = add <4 x i16> [[SHL_I]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ADD_I]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = ashr <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
+// RV64-NEXT: ret i32 [[TMP4]]
//
-uint16x4_t test_psh1add_u16x4(uint16x4_t a, uint16x4_t b) {
- return __riscv_psh1add_u16x4(a, b);
+int8x4_t test_psra_s_i8x4(int8x4_t a, unsigned shamt) {
+ return __riscv_psra_s_i8x4(a, shamt);
}
-// RV32-LABEL: define dso_local i64 @test_psh1add_i32x2(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
-// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], splat (i32 1)
-// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[SHL_I]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
-//
-// RV64-LABEL: define dso_local i64 @test_psh1add_i32x2(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
-// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], splat (i32 1)
-// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[SHL_I]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
-//
-int32x2_t test_psh1add_i32x2(int32x2_t a, int32x2_t b) {
- return __riscv_psh1add_i32x2(a, b);
-}
-
-// RV32-LABEL: define dso_local i64 @test_psh1add_u32x2(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
-// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], splat (i32 1)
-// RV32-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[SHL_I]], [[TMP1]]
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
-//
-// RV64-LABEL: define dso_local i64 @test_psh1add_u32x2(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
-// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], splat (i32 1)
-// RV64-NEXT: [[ADD_I:%.*]] = add <2 x i32> [[SHL_I]], [[TMP1]]
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ADD_I]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
-//
-uint32x2_t test_psh1add_u32x2(uint32x2_t a, uint32x2_t b) {
- return __riscv_psh1add_u32x2(a, b);
-}
-
-// RV32-LABEL: define dso_local i64 @test_pssh1sadd_i16x4(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
-// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP0]])
-// RV32-NEXT: [[ELT_SAT3_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> [[ELT_SAT_I]], <4 x i16> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT3_I]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
-//
-// RV64-LABEL: define dso_local i64 @test_pssh1sadd_i16x4(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
-// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP0]])
-// RV64-NEXT: [[ELT_SAT3_I:%.*]] = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> [[ELT_SAT_I]], <4 x i16> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_SAT3_I]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
-//
-int16x4_t test_pssh1sadd_i16x4(int16x4_t a, int16x4_t b) {
- return __riscv_pssh1sadd_i16x4(a, b);
-}
-
-// RV32-LABEL: define dso_local i64 @test_pssh1sadd_i32x2(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
-// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV32-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP0]])
-// RV32-NEXT: [[ELT_SAT3_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[ELT_SAT_I]], <2 x i32> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT3_I]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
-//
-// RV64-LABEL: define dso_local i64 @test_pssh1sadd_i32x2(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
-// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV64-NEXT: [[ELT_SAT_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP0]])
-// RV64-NEXT: [[ELT_SAT3_I:%.*]] = call <2 x i32> @llvm.sadd.sat.v2i32(<2 x i32> [[ELT_SAT_I]], <2 x i32> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_SAT3_I]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
-//
-int32x2_t test_pssh1sadd_i32x2(int32x2_t a, int32x2_t b) {
- return __riscv_pssh1sadd_i32x2(a, b);
-}
-
-/* Packed Minimum and Maximum (32-bit) */
-
-// RV32-LABEL: define dso_local i32 @test_pmin_i8x4(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i32 @test_psrl_s_u8x4(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MIN_I]] to i32
-// RV32-NEXT: ret i32 [[TMP2]]
-//
-// RV64-LABEL: define dso_local i32 @test_pmin_i8x4(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
-// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i8> @llvm.smin.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MIN_I]] to i32
-// RV64-NEXT: ret i32 [[TMP2]]
-//
-int8x4_t test_pmin_i8x4(int8x4_t a, int8x4_t b) {
- return __riscv_pmin_i8x4(a, b);
-}
-
-// RV32-LABEL: define dso_local i32 @test_pmin_i16x2(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
-// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MIN_I]] to i32
-// RV32-NEXT: ret i32 [[TMP2]]
-//
-// RV64-LABEL: define dso_local i32 @test_pmin_i16x2(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
-// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i16> @llvm.smin.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MIN_I]] to i32
-// RV64-NEXT: ret i32 [[TMP2]]
-//
-int16x2_t test_pmin_i16x2(int16x2_t a, int16x2_t b) {
- return __riscv_pmin_i16x2(a, b);
-}
-
-// RV32-LABEL: define dso_local i32 @test_pminu_u8x4(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
-// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MIN_I]] to i32
-// RV32-NEXT: ret i32 [[TMP2]]
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = lshr <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
//
-// RV64-LABEL: define dso_local i32 @test_pminu_u8x4(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psrl_s_u8x4(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i8> @llvm.umin.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MIN_I]] to i32
-// RV64-NEXT: ret i32 [[TMP2]]
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i8> [[TMP3]], <4 x i8> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = lshr <4 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i8> [[SHR_I]] to i32
+// RV64-NEXT: ret i32 [[TMP4]]
//
-uint8x4_t test_pminu_u8x4(uint8x4_t a, uint8x4_t b) {
- return __riscv_pminu_u8x4(a, b);
+uint8x4_t test_psrl_s_u8x4(uint8x4_t a, unsigned shamt) {
+ return __riscv_psrl_s_u8x4(a, shamt);
}
-// RV32-LABEL: define dso_local i32 @test_pminu_u16x2(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i32 @test_psra_s_i16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MIN_I]] to i32
-// RV32-NEXT: ret i32 [[TMP2]]
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = ashr <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
//
-// RV64-LABEL: define dso_local i32 @test_pminu_u16x2(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psra_s_i16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i16> @llvm.umin.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MIN_I]] to i32
-// RV64-NEXT: ret i32 [[TMP2]]
-//
-uint16x2_t test_pminu_u16x2(uint16x2_t a, uint16x2_t b) {
- return __riscv_pminu_u16x2(a, b);
-}
-
-// RV32-LABEL: define dso_local i32 @test_pmax_i8x4(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
-// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MAX_I]] to i32
-// RV32-NEXT: ret i32 [[TMP2]]
-//
-// RV64-LABEL: define dso_local i32 @test_pmax_i8x4(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
-// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i8> @llvm.smax.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MAX_I]] to i32
-// RV64-NEXT: ret i32 [[TMP2]]
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = ashr <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
+// RV64-NEXT: ret i32 [[TMP4]]
//
-int8x4_t test_pmax_i8x4(int8x4_t a, int8x4_t b) {
- return __riscv_pmax_i8x4(a, b);
+int16x2_t test_psra_s_i16x2(int16x2_t a, unsigned shamt) {
+ return __riscv_psra_s_i16x2(a, shamt);
}
-// RV32-LABEL: define dso_local i32 @test_pmax_i16x2(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i32 @test_psrl_s_u16x2(
+// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MAX_I]] to i32
-// RV32-NEXT: ret i32 [[TMP2]]
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = lshr <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
+// RV32-NEXT: ret i32 [[TMP4]]
//
-// RV64-LABEL: define dso_local i32 @test_pmax_i16x2(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i32 @test_psrl_s_u16x2(
+// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i16> @llvm.smax.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MAX_I]] to i32
-// RV64-NEXT: ret i32 [[TMP2]]
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <2 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <2 x i16> [[TMP3]], <2 x i16> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = lshr <2 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <2 x i16> [[SHR_I]] to i32
+// RV64-NEXT: ret i32 [[TMP4]]
//
-int16x2_t test_pmax_i16x2(int16x2_t a, int16x2_t b) {
- return __riscv_pmax_i16x2(a, b);
+uint16x2_t test_psrl_s_u16x2(uint16x2_t a, unsigned shamt) {
+ return __riscv_psrl_s_u16x2(a, shamt);
}
-// RV32-LABEL: define dso_local i32 @test_pmaxu_u8x4(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
-// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MAX_I]] to i32
-// RV32-NEXT: ret i32 [[TMP2]]
-//
-// RV64-LABEL: define dso_local i32 @test_pmaxu_u8x4(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
-// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <4 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <4 x i8>
-// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i8> @llvm.umax.v4i8(<4 x i8> [[TMP0]], <4 x i8> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i8> [[ELT_MAX_I]] to i32
-// RV64-NEXT: ret i32 [[TMP2]]
-//
-uint8x4_t test_pmaxu_u8x4(uint8x4_t a, uint8x4_t b) {
- return __riscv_pmaxu_u8x4(a, b);
-}
+/* Packed Shifts (64-bit) */
-// RV32-LABEL: define dso_local i32 @test_pmaxu_u16x2(
-// RV32-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_psll_s_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MAX_I]] to i32
-// RV32-NEXT: ret i32 [[TMP2]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
//
-// RV64-LABEL: define dso_local i32 @test_pmaxu_u16x2(
-// RV64-SAME: i32 noundef [[A_COERCE:%.*]], i32 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_psll_s_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i32 [[A_COERCE]] to <2 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i32 [[B_COERCE]] to <2 x i16>
-// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i16> @llvm.umax.v2i16(<2 x i16> [[TMP0]], <2 x i16> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i16> [[ELT_MAX_I]] to i32
-// RV64-NEXT: ret i32 [[TMP2]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
//
-uint16x2_t test_pmaxu_u16x2(uint16x2_t a, uint16x2_t b) {
- return __riscv_pmaxu_u16x2(a, b);
+int8x8_t test_psll_s_i8x8(int8x8_t a, unsigned shamt) {
+ return __riscv_psll_s_i8x8(a, shamt);
}
-/* Packed Minimum and Maximum (64-bit) */
-
-// RV32-LABEL: define dso_local i64 @test_pmin_i8x8(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_psll_s_u8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <8 x i8> @llvm.smin.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MIN_I]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
//
-// RV64-LABEL: define dso_local i64 @test_pmin_i8x8(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_psll_s_u8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <8 x i8> @llvm.smin.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MIN_I]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
//
-int8x8_t test_pmin_i8x8(int8x8_t a, int8x8_t b) {
- return __riscv_pmin_i8x8(a, b);
+uint8x8_t test_psll_s_u8x8(uint8x8_t a, unsigned shamt) {
+ return __riscv_psll_s_u8x8(a, shamt);
}
-// RV32-LABEL: define dso_local i64 @test_pmin_i16x4(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_psll_s_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MIN_I]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
//
-// RV64-LABEL: define dso_local i64 @test_pmin_i16x4(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_psll_s_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i16> @llvm.smin.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MIN_I]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
-//
-int16x4_t test_pmin_i16x4(int16x4_t a, int16x4_t b) {
- return __riscv_pmin_i16x4(a, b);
-}
-
-// RV32-LABEL: define dso_local i64 @test_pmin_i32x2(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
-// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MIN_I]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
-//
-// RV64-LABEL: define dso_local i64 @test_pmin_i32x2(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
-// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i32> @llvm.smin.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MIN_I]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
//
-int32x2_t test_pmin_i32x2(int32x2_t a, int32x2_t b) {
- return __riscv_pmin_i32x2(a, b);
+int16x4_t test_psll_s_i16x4(int16x4_t a, unsigned shamt) {
+ return __riscv_psll_s_i16x4(a, shamt);
}
-// RV32-LABEL: define dso_local i64 @test_pminu_u8x8(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_psll_s_u16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <8 x i8> @llvm.umin.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MIN_I]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
//
-// RV64-LABEL: define dso_local i64 @test_pminu_u8x8(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_psll_s_u16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <8 x i8> @llvm.umin.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MIN_I]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
//
-uint8x8_t test_pminu_u8x8(uint8x8_t a, uint8x8_t b) {
- return __riscv_pminu_u8x8(a, b);
+uint16x4_t test_psll_s_u16x4(uint16x4_t a, unsigned shamt) {
+ return __riscv_psll_s_u16x4(a, shamt);
}
-// RV32-LABEL: define dso_local i64 @test_pminu_u16x4(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_psll_s_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MIN_I]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
+// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
//
-// RV64-LABEL: define dso_local i64 @test_pminu_u16x4(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_psll_s_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <4 x i16> @llvm.umin.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MIN_I]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
+// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
//
-uint16x4_t test_pminu_u16x4(uint16x4_t a, uint16x4_t b) {
- return __riscv_pminu_u16x4(a, b);
+int32x2_t test_psll_s_i32x2(int32x2_t a, unsigned shamt) {
+ return __riscv_psll_s_i32x2(a, shamt);
}
-// RV32-LABEL: define dso_local i64 @test_pminu_u32x2(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_psll_s_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV32-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MIN_I]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
+// RV32-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
+// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
//
-// RV64-LABEL: define dso_local i64 @test_pminu_u32x2(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_psll_s_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV64-NEXT: [[ELT_MIN_I:%.*]] = call <2 x i32> @llvm.umin.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MIN_I]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
+// RV64-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
+// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHL_I:%.*]] = shl <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHL_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
//
-uint32x2_t test_pminu_u32x2(uint32x2_t a, uint32x2_t b) {
- return __riscv_pminu_u32x2(a, b);
+uint32x2_t test_psll_s_u32x2(uint32x2_t a, unsigned shamt) {
+ return __riscv_psll_s_u32x2(a, shamt);
}
-// RV32-LABEL: define dso_local i64 @test_pmax_i8x8(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_psra_s_i8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MAX_I]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = ashr <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
//
-// RV64-LABEL: define dso_local i64 @test_pmax_i8x8(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_psra_s_i8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <8 x i8> @llvm.smax.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MAX_I]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = ashr <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
//
-int8x8_t test_pmax_i8x8(int8x8_t a, int8x8_t b) {
- return __riscv_pmax_i8x8(a, b);
+int8x8_t test_psra_s_i8x8(int8x8_t a, unsigned shamt) {
+ return __riscv_psra_s_i8x8(a, shamt);
}
-// RV32-LABEL: define dso_local i64 @test_pmax_i16x4(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_psrl_s_u8x8(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i16> @llvm.smax.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MAX_I]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV32-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = lshr <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
//
-// RV64-LABEL: define dso_local i64 @test_pmax_i16x4(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_psrl_s_u8x8(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i16> @llvm.smax.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MAX_I]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i8
+// RV64-NEXT: [[TMP2:%.*]] = and i8 [[TMP1]], 7
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <8 x i8> poison, i8 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <8 x i8> [[TMP3]], <8 x i8> poison, <8 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = lshr <8 x i8> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <8 x i8> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
//
-int16x4_t test_pmax_i16x4(int16x4_t a, int16x4_t b) {
- return __riscv_pmax_i16x4(a, b);
+uint8x8_t test_psrl_s_u8x8(uint8x8_t a, unsigned shamt) {
+ return __riscv_psrl_s_u8x8(a, shamt);
}
-// RV32-LABEL: define dso_local i64 @test_pmax_i32x2(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_psra_s_i16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MAX_I]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = ashr <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
//
-// RV64-LABEL: define dso_local i64 @test_pmax_i32x2(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_psra_s_i16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i32> @llvm.smax.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MAX_I]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = ashr <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
//
-int32x2_t test_pmax_i32x2(int32x2_t a, int32x2_t b) {
- return __riscv_pmax_i32x2(a, b);
+int16x4_t test_psra_s_i16x4(int16x4_t a, unsigned shamt) {
+ return __riscv_psra_s_i16x4(a, shamt);
}
-// RV32-LABEL: define dso_local i64 @test_pmaxu_u8x8(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_psrl_s_u16x4(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <8 x i8> @llvm.umax.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MAX_I]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV32-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV32-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV32-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV32-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = lshr <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV32-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP4]]
//
-// RV64-LABEL: define dso_local i64 @test_pmaxu_u8x8(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_psrl_s_u16x4(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <8 x i8>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <8 x i8>
-// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <8 x i8> @llvm.umax.v8i8(<8 x i8> [[TMP0]], <8 x i8> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <8 x i8> [[ELT_MAX_I]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
+// RV64-NEXT: [[TMP1:%.*]] = trunc i32 [[SHAMT]] to i16
+// RV64-NEXT: [[TMP2:%.*]] = and i16 [[TMP1]], 15
+// RV64-NEXT: [[TMP3:%.*]] = insertelement <4 x i16> poison, i16 [[TMP2]], i64 0
+// RV64-NEXT: [[SH_PROM_I:%.*]] = shufflevector <4 x i16> [[TMP3]], <4 x i16> poison, <4 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = lshr <4 x i16> [[TMP0]], [[SH_PROM_I]]
+// RV64-NEXT: [[TMP4:%.*]] = bitcast <4 x i16> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP4]]
//
-uint8x8_t test_pmaxu_u8x8(uint8x8_t a, uint8x8_t b) {
- return __riscv_pmaxu_u8x8(a, b);
+uint16x4_t test_psrl_s_u16x4(uint16x4_t a, unsigned shamt) {
+ return __riscv_psrl_s_u16x4(a, shamt);
}
-// RV32-LABEL: define dso_local i64 @test_pmaxu_u16x4(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_psra_s_i32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
-// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i16> @llvm.umax.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MAX_I]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
+// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV32-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
+// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = ashr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
//
-// RV64-LABEL: define dso_local i64 @test_pmaxu_u16x4(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_psra_s_i32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
-// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <4 x i16>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <4 x i16>
-// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <4 x i16> @llvm.umax.v4i16(<4 x i16> [[TMP0]], <4 x i16> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <4 x i16> [[ELT_MAX_I]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
+// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
+// RV64-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
+// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = ashr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
//
-uint16x4_t test_pmaxu_u16x4(uint16x4_t a, uint16x4_t b) {
- return __riscv_pmaxu_u16x4(a, b);
+int32x2_t test_psra_s_i32x2(int32x2_t a, unsigned shamt) {
+ return __riscv_psra_s_i32x2(a, shamt);
}
-// RV32-LABEL: define dso_local i64 @test_pmaxu_u32x2(
-// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV32-LABEL: define dso_local i64 @test_psrl_s_u32x2(
+// RV32-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef [[SHAMT:%.*]]) #[[ATTR0]] {
// RV32-NEXT: [[ENTRY:.*:]]
// RV32-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV32-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV32-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
-// RV32-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MAX_I]] to i64
-// RV32-NEXT: ret i64 [[TMP2]]
+// RV32-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV32-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
+// RV32-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV32-NEXT: [[SHR_I:%.*]] = lshr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV32-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
+// RV32-NEXT: ret i64 [[TMP1]]
//
-// RV64-LABEL: define dso_local i64 @test_pmaxu_u32x2(
-// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i64 noundef [[B_COERCE:%.*]]) #[[ATTR0]] {
+// RV64-LABEL: define dso_local i64 @test_psrl_s_u32x2(
+// RV64-SAME: i64 noundef [[A_COERCE:%.*]], i32 noundef signext [[SHAMT:%.*]]) #[[ATTR0]] {
// RV64-NEXT: [[ENTRY:.*:]]
// RV64-NEXT: [[TMP0:%.*]] = bitcast i64 [[A_COERCE]] to <2 x i32>
-// RV64-NEXT: [[TMP1:%.*]] = bitcast i64 [[B_COERCE]] to <2 x i32>
-// RV64-NEXT: [[ELT_MAX_I:%.*]] = call <2 x i32> @llvm.umax.v2i32(<2 x i32> [[TMP0]], <2 x i32> [[TMP1]])
-// RV64-NEXT: [[TMP2:%.*]] = bitcast <2 x i32> [[ELT_MAX_I]] to i64
-// RV64-NEXT: ret i64 [[TMP2]]
+// RV64-NEXT: [[AND_I:%.*]] = and i32 [[SHAMT]], 31
+// RV64-NEXT: [[SPLAT_SPLATINSERT_I:%.*]] = insertelement <2 x i32> poison, i32 [[AND_I]], i64 0
+// RV64-NEXT: [[SPLAT_SPLAT_I:%.*]] = shufflevector <2 x i32> [[SPLAT_SPLATINSERT_I]], <2 x i32> poison, <2 x i32> zeroinitializer
+// RV64-NEXT: [[SHR_I:%.*]] = lshr <2 x i32> [[TMP0]], [[SPLAT_SPLAT_I]]
+// RV64-NEXT: [[TMP1:%.*]] = bitcast <2 x i32> [[SHR_I]] to i64
+// RV64-NEXT: ret i64 [[TMP1]]
//
-uint32x2_t test_pmaxu_u32x2(uint32x2_t a, uint32x2_t b) {
- return __riscv_pmaxu_u32x2(a, b);
+uint32x2_t test_psrl_s_u32x2(uint32x2_t a, unsigned shamt) {
+ return __riscv_psrl_s_u32x2(a, shamt);
}
/* Packed Logical Operations (32-bit) */
diff --git a/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
index 6a01dcfa35219..5e9afc3cd5f99 100644
--- a/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
+++ b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
@@ -152,328 +152,6 @@ int32x2_t test_pmv_s_i32x2_imm_big(void) {
return __riscv_pmv_s_i32x2(0x12345);
}
-// CHECK-LABEL: test_psll_s_u8x4:
-// CHECK: psll.bs
-uint8x4_t test_psll_s_u8x4(uint8x4_t a, unsigned n) {
- return __riscv_psll_s_u8x4(a, n);
-}
-
-// CHECK-LABEL: test_psll_s_i8x4:
-// CHECK: psll.bs
-int8x4_t test_psll_s_i8x4(int8x4_t a, unsigned n) {
- return __riscv_psll_s_i8x4(a, n);
-}
-
-// CHECK-LABEL: test_psll_s_u16x2:
-// CHECK: psll.hs
-uint16x2_t test_psll_s_u16x2(uint16x2_t a, unsigned n) {
- return __riscv_psll_s_u16x2(a, n);
-}
-
-// CHECK-LABEL: test_psll_s_i16x2:
-// CHECK: psll.hs
-int16x2_t test_psll_s_i16x2(int16x2_t a, unsigned n) {
- return __riscv_psll_s_i16x2(a, n);
-}
-
-// CHECK-LABEL: test_psrl_s_u8x4:
-// CHECK: psrl.bs
-uint8x4_t test_psrl_s_u8x4(uint8x4_t a, unsigned n) {
- return __riscv_psrl_s_u8x4(a, n);
-}
-
-// CHECK-LABEL: test_psrl_s_u16x2:
-// CHECK: psrl.hs
-uint16x2_t test_psrl_s_u16x2(uint16x2_t a, unsigned n) {
- return __riscv_psrl_s_u16x2(a, n);
-}
-
-// CHECK-LABEL: test_psra_s_i8x4:
-// CHECK: psra.bs
-int8x4_t test_psra_s_i8x4(int8x4_t a, unsigned n) {
- return __riscv_psra_s_i8x4(a, n);
-}
-
-// CHECK-LABEL: test_psra_s_i16x2:
-// CHECK: psra.hs
-int16x2_t test_psra_s_i16x2(int16x2_t a, unsigned n) {
- return __riscv_psra_s_i16x2(a, n);
-}
-
-// CHECK-LABEL: test_psll_s_u8x4_imm:
-// CHECK: pslli.b{{[[:space:]]+}}{{.*}}, 2
-uint8x4_t test_psll_s_u8x4_imm(uint8x4_t a) {
- return __riscv_psll_s_u8x4(a, 2);
-}
-
-// CHECK-LABEL: test_psll_s_i8x4_imm:
-// CHECK: pslli.b{{[[:space:]]+}}{{.*}}, 3
-int8x4_t test_psll_s_i8x4_imm(int8x4_t a) { return __riscv_psll_s_i8x4(a, 3); }
-
-// CHECK-LABEL: test_psll_s_u16x2_imm:
-// CHECK: pslli.h{{[[:space:]]+}}{{.*}}, 5
-uint16x2_t test_psll_s_u16x2_imm(uint16x2_t a) {
- return __riscv_psll_s_u16x2(a, 5);
-}
-
-// CHECK-LABEL: test_psll_s_i16x2_imm:
-// CHECK: pslli.h{{[[:space:]]+}}{{.*}}, 7
-int16x2_t test_psll_s_i16x2_imm(int16x2_t a) {
- return __riscv_psll_s_i16x2(a, 7);
-}
-
-// CHECK-LABEL: test_psrl_s_u8x4_imm:
-// CHECK: psrli.b{{[[:space:]]+}}{{.*}}, 2
-uint8x4_t test_psrl_s_u8x4_imm(uint8x4_t a) {
- return __riscv_psrl_s_u8x4(a, 2);
-}
-
-// CHECK-LABEL: test_psrl_s_u16x2_imm:
-// CHECK: psrli.h{{[[:space:]]+}}{{.*}}, 3
-uint16x2_t test_psrl_s_u16x2_imm(uint16x2_t a) {
- return __riscv_psrl_s_u16x2(a, 3);
-}
-
-// CHECK-LABEL: test_psra_s_i8x4_imm:
-// CHECK: psrai.b{{[[:space:]]+}}{{.*}}, 4
-int8x4_t test_psra_s_i8x4_imm(int8x4_t a) { return __riscv_psra_s_i8x4(a, 4); }
-
-// CHECK-LABEL: test_psra_s_i16x2_imm:
-// CHECK: psrai.h{{[[:space:]]+}}{{.*}}, 5
-int16x2_t test_psra_s_i16x2_imm(int16x2_t a) {
- return __riscv_psra_s_i16x2(a, 5);
-}
-
-// CHECK-LABEL: test_psll_s_u8x8:
-// RV32: psll.dbs
-// RV64: psll.bs
-uint8x8_t test_psll_s_u8x8(uint8x8_t a, unsigned n) {
- return __riscv_psll_s_u8x8(a, n);
-}
-
-// CHECK-LABEL: test_psll_s_i8x8:
-// RV32: psll.dbs
-// RV64: psll.bs
-int8x8_t test_psll_s_i8x8(int8x8_t a, unsigned n) {
- return __riscv_psll_s_i8x8(a, n);
-}
-
-// CHECK-LABEL: test_psll_s_u16x4:
-// RV32: psll.dhs
-// RV64: psll.hs
-uint16x4_t test_psll_s_u16x4(uint16x4_t a, unsigned n) {
- return __riscv_psll_s_u16x4(a, n);
-}
-
-// CHECK-LABEL: test_psll_s_i16x4:
-// RV32: psll.dhs
-// RV64: psll.hs
-int16x4_t test_psll_s_i16x4(int16x4_t a, unsigned n) {
- return __riscv_psll_s_i16x4(a, n);
-}
-
-// CHECK-LABEL: test_psll_s_u32x2:
-// RV32: psll.dws
-// RV64: psll.ws
-uint32x2_t test_psll_s_u32x2(uint32x2_t a, unsigned n) {
- return __riscv_psll_s_u32x2(a, n);
-}
-
-// CHECK-LABEL: test_psll_s_i32x2:
-// RV32: psll.dws
-// RV64: psll.ws
-int32x2_t test_psll_s_i32x2(int32x2_t a, unsigned n) {
- return __riscv_psll_s_i32x2(a, n);
-}
-
-// CHECK-LABEL: test_psrl_s_u8x8:
-// RV32: psrl.dbs
-// RV64: psrl.bs
-uint8x8_t test_psrl_s_u8x8(uint8x8_t a, unsigned n) {
- return __riscv_psrl_s_u8x8(a, n);
-}
-
-// CHECK-LABEL: test_psrl_s_u16x4:
-// RV32: psrl.dhs
-// RV64: psrl.hs
-uint16x4_t test_psrl_s_u16x4(uint16x4_t a, unsigned n) {
- return __riscv_psrl_s_u16x4(a, n);
-}
-
-// CHECK-LABEL: test_psrl_s_u32x2:
-// RV32: psrl.dws
-// RV64: psrl.ws
-uint32x2_t test_psrl_s_u32x2(uint32x2_t a, unsigned n) {
- return __riscv_psrl_s_u32x2(a, n);
-}
-
-// CHECK-LABEL: test_psra_s_i8x8:
-// RV32: psra.dbs
-// RV64: psra.bs
-int8x8_t test_psra_s_i8x8(int8x8_t a, unsigned n) {
- return __riscv_psra_s_i8x8(a, n);
-}
-
-// CHECK-LABEL: test_psra_s_i16x4:
-// RV32: psra.dhs
-// RV64: psra.hs
-int16x4_t test_psra_s_i16x4(int16x4_t a, unsigned n) {
- return __riscv_psra_s_i16x4(a, n);
-}
-
-// CHECK-LABEL: test_psra_s_i32x2:
-// RV32: psra.dws
-// RV64: psra.ws
-int32x2_t test_psra_s_i32x2(int32x2_t a, unsigned n) {
- return __riscv_psra_s_i32x2(a, n);
-}
-
-// CHECK-LABEL: test_psll_s_u8x8_imm:
-// RV32: pslli.db{{[[:space:]]+}}{{.*}}, 2
-// RV64: pslli.b{{[[:space:]]+}}{{.*}}, 2
-uint8x8_t test_psll_s_u8x8_imm(uint8x8_t a) {
- return __riscv_psll_s_u8x8(a, 2);
-}
-
-// CHECK-LABEL: test_psll_s_i8x8_imm:
-// RV32: pslli.db{{[[:space:]]+}}{{.*}}, 3
-// RV64: pslli.b{{[[:space:]]+}}{{.*}}, 3
-int8x8_t test_psll_s_i8x8_imm(int8x8_t a) { return __riscv_psll_s_i8x8(a, 3); }
-
-// CHECK-LABEL: test_psll_s_u16x4_imm:
-// RV32: pslli.dh{{[[:space:]]+}}{{.*}}, 4
-// RV64: pslli.h{{[[:space:]]+}}{{.*}}, 4
-uint16x4_t test_psll_s_u16x4_imm(uint16x4_t a) {
- return __riscv_psll_s_u16x4(a, 4);
-}
-
-// CHECK-LABEL: test_psll_s_i16x4_imm:
-// RV32: pslli.dh{{[[:space:]]+}}{{.*}}, 5
-// RV64: pslli.h{{[[:space:]]+}}{{.*}}, 5
-int16x4_t test_psll_s_i16x4_imm(int16x4_t a) {
- return __riscv_psll_s_i16x4(a, 5);
-}
-
-// CHECK-LABEL: test_psll_s_u32x2_imm:
-// RV32: pslli.dw{{[[:space:]]+}}{{.*}}, 7
-// RV64: pslli.w{{[[:space:]]+}}{{.*}}, 7
-uint32x2_t test_psll_s_u32x2_imm(uint32x2_t a) {
- return __riscv_psll_s_u32x2(a, 7);
-}
-
-// CHECK-LABEL: test_psll_s_i32x2_imm:
-// RV32: pslli.dw{{[[:space:]]+}}{{.*}}, 9
-// RV64: pslli.w{{[[:space:]]+}}{{.*}}, 9
-int32x2_t test_psll_s_i32x2_imm(int32x2_t a) {
- return __riscv_psll_s_i32x2(a, 9);
-}
-
-// CHECK-LABEL: test_psrl_s_u8x8_imm:
-// RV32: psrli.db{{[[:space:]]+}}{{.*}}, 2
-// RV64: psrli.b{{[[:space:]]+}}{{.*}}, 2
-uint8x8_t test_psrl_s_u8x8_imm(uint8x8_t a) {
- return __riscv_psrl_s_u8x8(a, 2);
-}
-
-// CHECK-LABEL: test_psrl_s_u16x4_imm:
-// RV32: psrli.dh{{[[:space:]]+}}{{.*}}, 3
-// RV64: psrli.h{{[[:space:]]+}}{{.*}}, 3
-uint16x4_t test_psrl_s_u16x4_imm(uint16x4_t a) {
- return __riscv_psrl_s_u16x4(a, 3);
-}
-
-// CHECK-LABEL: test_psrl_s_u32x2_imm:
-// RV32: psrli.dw{{[[:space:]]+}}{{.*}}, 5
-// RV64: psrli.w{{[[:space:]]+}}{{.*}}, 5
-uint32x2_t test_psrl_s_u32x2_imm(uint32x2_t a) {
- return __riscv_psrl_s_u32x2(a, 5);
-}
-
-// CHECK-LABEL: test_psra_s_i8x8_imm:
-// RV32: psrai.db{{[[:space:]]+}}{{.*}}, 4
-// RV64: psrai.b{{[[:space:]]+}}{{.*}}, 4
-int8x8_t test_psra_s_i8x8_imm(int8x8_t a) { return __riscv_psra_s_i8x8(a, 4); }
-
-// CHECK-LABEL: test_psra_s_i16x4_imm:
-// RV32: psrai.dh{{[[:space:]]+}}{{.*}}, 5
-// RV64: psrai.h{{[[:space:]]+}}{{.*}}, 5
-int16x4_t test_psra_s_i16x4_imm(int16x4_t a) {
- return __riscv_psra_s_i16x4(a, 5);
-}
-
-// CHECK-LABEL: test_psra_s_i32x2_imm:
-// RV32: psrai.dw{{[[:space:]]+}}{{.*}}, 11
-// RV64: psrai.w{{[[:space:]]+}}{{.*}}, 11
-int32x2_t test_psra_s_i32x2_imm(int32x2_t a) {
- return __riscv_psra_s_i32x2(a, 11);
-}
-
-// CHECK-LABEL: test_padd_s_u8x4:
-// CHECK: padd.bs
-uint8x4_t test_padd_s_u8x4(uint8x4_t a, uint8_t b) {
- return __riscv_padd_s_u8x4(a, b);
-}
-
-// CHECK-LABEL: test_padd_s_i8x4:
-// CHECK: padd.bs
-int8x4_t test_padd_s_i8x4(int8x4_t a, int8_t b) {
- return __riscv_padd_s_i8x4(a, b);
-}
-
-// CHECK-LABEL: test_padd_s_u16x2:
-// CHECK: padd.hs
-uint16x2_t test_padd_s_u16x2(uint16x2_t a, uint16_t b) {
- return __riscv_padd_s_u16x2(a, b);
-}
-
-// CHECK-LABEL: test_padd_s_i16x2:
-// CHECK: padd.hs
-int16x2_t test_padd_s_i16x2(int16x2_t a, int16_t b) {
- return __riscv_padd_s_i16x2(a, b);
-}
-
-// CHECK-LABEL: test_padd_s_u8x8:
-// RV32: padd.dbs
-// RV64: padd.bs
-uint8x8_t test_padd_s_u8x8(uint8x8_t a, uint8_t b) {
- return __riscv_padd_s_u8x8(a, b);
-}
-
-// CHECK-LABEL: test_padd_s_i8x8:
-// RV32: padd.dbs
-// RV64: padd.bs
-int8x8_t test_padd_s_i8x8(int8x8_t a, int8_t b) {
- return __riscv_padd_s_i8x8(a, b);
-}
-
-// CHECK-LABEL: test_padd_s_u16x4:
-// RV32: padd.dhs
-// RV64: padd.hs
-uint16x4_t test_padd_s_u16x4(uint16x4_t a, uint16_t b) {
- return __riscv_padd_s_u16x4(a, b);
-}
-
-// CHECK-LABEL: test_padd_s_i16x4:
-// RV32: padd.dhs
-// RV64: padd.hs
-int16x4_t test_padd_s_i16x4(int16x4_t a, int16_t b) {
- return __riscv_padd_s_i16x4(a, b);
-}
-
-// CHECK-LABEL: test_padd_s_u32x2:
-// RV32: padd.dws
-// RV64: padd.ws
-uint32x2_t test_padd_s_u32x2(uint32x2_t a, uint32_t b) {
- return __riscv_padd_s_u32x2(a, b);
-}
-
-// CHECK-LABEL: test_padd_s_i32x2:
-// RV32: padd.dws
-// RV64: padd.ws
-int32x2_t test_padd_s_i32x2(int32x2_t a, int32_t b) {
- return __riscv_padd_s_i32x2(a, b);
-}
-
// CHECK-LABEL: test_padd_i8x4:
// CHECK: padd.b
int8x4_t test_padd_i8x4(int8x4_t a, int8x4_t b) {
@@ -629,6 +307,72 @@ int16x4_t test_pneg_i16x4(int16x4_t a) { return __riscv_pneg_i16x4(a); }
// RV64: pneg.w
int32x2_t test_pneg_i32x2(int32x2_t a) { return __riscv_pneg_i32x2(a); }
+// CHECK-LABEL: test_padd_s_u8x4:
+// CHECK: padd.bs
+uint8x4_t test_padd_s_u8x4(uint8x4_t a, uint8_t b) {
+ return __riscv_padd_s_u8x4(a, b);
+}
+
+// CHECK-LABEL: test_padd_s_i8x4:
+// CHECK: padd.bs
+int8x4_t test_padd_s_i8x4(int8x4_t a, int8_t b) {
+ return __riscv_padd_s_i8x4(a, b);
+}
+
+// CHECK-LABEL: test_padd_s_u16x2:
+// CHECK: padd.hs
+uint16x2_t test_padd_s_u16x2(uint16x2_t a, uint16_t b) {
+ return __riscv_padd_s_u16x2(a, b);
+}
+
+// CHECK-LABEL: test_padd_s_i16x2:
+// CHECK: padd.hs
+int16x2_t test_padd_s_i16x2(int16x2_t a, int16_t b) {
+ return __riscv_padd_s_i16x2(a, b);
+}
+
+// CHECK-LABEL: test_padd_s_u8x8:
+// RV32: padd.dbs
+// RV64: padd.bs
+uint8x8_t test_padd_s_u8x8(uint8x8_t a, uint8_t b) {
+ return __riscv_padd_s_u8x8(a, b);
+}
+
+// CHECK-LABEL: test_padd_s_i8x8:
+// RV32: padd.dbs
+// RV64: padd.bs
+int8x8_t test_padd_s_i8x8(int8x8_t a, int8_t b) {
+ return __riscv_padd_s_i8x8(a, b);
+}
+
+// CHECK-LABEL: test_padd_s_u16x4:
+// RV32: padd.dhs
+// RV64: padd.hs
+uint16x4_t test_padd_s_u16x4(uint16x4_t a, uint16_t b) {
+ return __riscv_padd_s_u16x4(a, b);
+}
+
+// CHECK-LABEL: test_padd_s_i16x4:
+// RV32: padd.dhs
+// RV64: padd.hs
+int16x4_t test_padd_s_i16x4(int16x4_t a, int16_t b) {
+ return __riscv_padd_s_i16x4(a, b);
+}
+
+// CHECK-LABEL: test_padd_s_u32x2:
+// RV32: padd.dws
+// RV64: padd.ws
+uint32x2_t test_padd_s_u32x2(uint32x2_t a, uint32_t b) {
+ return __riscv_padd_s_u32x2(a, b);
+}
+
+// CHECK-LABEL: test_padd_s_i32x2:
+// RV32: padd.dws
+// RV64: padd.ws
+int32x2_t test_padd_s_i32x2(int32x2_t a, int32_t b) {
+ return __riscv_padd_s_i32x2(a, b);
+}
+
// CHECK-LABEL: test_psadd_i8x4:
// CHECK: psadd.b
int8x4_t test_psadd_i8x4(int8x4_t a, int8x4_t b) {
@@ -953,6 +697,262 @@ uint32x2_t test_pmaxu_u32x2(uint32x2_t a, uint32x2_t b) {
return __riscv_pmaxu_u32x2(a, b);
}
+// CHECK-LABEL: test_psll_s_u8x4:
+// CHECK: psll.bs
+uint8x4_t test_psll_s_u8x4(uint8x4_t a, unsigned n) {
+ return __riscv_psll_s_u8x4(a, n);
+}
+
+// CHECK-LABEL: test_psll_s_i8x4:
+// CHECK: psll.bs
+int8x4_t test_psll_s_i8x4(int8x4_t a, unsigned n) {
+ return __riscv_psll_s_i8x4(a, n);
+}
+
+// CHECK-LABEL: test_psll_s_u16x2:
+// CHECK: psll.hs
+uint16x2_t test_psll_s_u16x2(uint16x2_t a, unsigned n) {
+ return __riscv_psll_s_u16x2(a, n);
+}
+
+// CHECK-LABEL: test_psll_s_i16x2:
+// CHECK: psll.hs
+int16x2_t test_psll_s_i16x2(int16x2_t a, unsigned n) {
+ return __riscv_psll_s_i16x2(a, n);
+}
+
+// CHECK-LABEL: test_psrl_s_u8x4:
+// CHECK: psrl.bs
+uint8x4_t test_psrl_s_u8x4(uint8x4_t a, unsigned n) {
+ return __riscv_psrl_s_u8x4(a, n);
+}
+
+// CHECK-LABEL: test_psrl_s_u16x2:
+// CHECK: psrl.hs
+uint16x2_t test_psrl_s_u16x2(uint16x2_t a, unsigned n) {
+ return __riscv_psrl_s_u16x2(a, n);
+}
+
+// CHECK-LABEL: test_psra_s_i8x4:
+// CHECK: psra.bs
+int8x4_t test_psra_s_i8x4(int8x4_t a, unsigned n) {
+ return __riscv_psra_s_i8x4(a, n);
+}
+
+// CHECK-LABEL: test_psra_s_i16x2:
+// CHECK: psra.hs
+int16x2_t test_psra_s_i16x2(int16x2_t a, unsigned n) {
+ return __riscv_psra_s_i16x2(a, n);
+}
+
+// CHECK-LABEL: test_psll_s_u8x4_imm:
+// CHECK: pslli.b{{[[:space:]]+}}{{.*}}, 2
+uint8x4_t test_psll_s_u8x4_imm(uint8x4_t a) {
+ return __riscv_psll_s_u8x4(a, 2);
+}
+
+// CHECK-LABEL: test_psll_s_i8x4_imm:
+// CHECK: pslli.b{{[[:space:]]+}}{{.*}}, 3
+int8x4_t test_psll_s_i8x4_imm(int8x4_t a) { return __riscv_psll_s_i8x4(a, 3); }
+
+// CHECK-LABEL: test_psll_s_u16x2_imm:
+// CHECK: pslli.h{{[[:space:]]+}}{{.*}}, 5
+uint16x2_t test_psll_s_u16x2_imm(uint16x2_t a) {
+ return __riscv_psll_s_u16x2(a, 5);
+}
+
+// CHECK-LABEL: test_psll_s_i16x2_imm:
+// CHECK: pslli.h{{[[:space:]]+}}{{.*}}, 7
+int16x2_t test_psll_s_i16x2_imm(int16x2_t a) {
+ return __riscv_psll_s_i16x2(a, 7);
+}
+
+// CHECK-LABEL: test_psrl_s_u8x4_imm:
+// CHECK: psrli.b{{[[:space:]]+}}{{.*}}, 2
+uint8x4_t test_psrl_s_u8x4_imm(uint8x4_t a) {
+ return __riscv_psrl_s_u8x4(a, 2);
+}
+
+// CHECK-LABEL: test_psrl_s_u16x2_imm:
+// CHECK: psrli.h{{[[:space:]]+}}{{.*}}, 3
+uint16x2_t test_psrl_s_u16x2_imm(uint16x2_t a) {
+ return __riscv_psrl_s_u16x2(a, 3);
+}
+
+// CHECK-LABEL: test_psra_s_i8x4_imm:
+// CHECK: psrai.b{{[[:space:]]+}}{{.*}}, 4
+int8x4_t test_psra_s_i8x4_imm(int8x4_t a) { return __riscv_psra_s_i8x4(a, 4); }
+
+// CHECK-LABEL: test_psra_s_i16x2_imm:
+// CHECK: psrai.h{{[[:space:]]+}}{{.*}}, 5
+int16x2_t test_psra_s_i16x2_imm(int16x2_t a) {
+ return __riscv_psra_s_i16x2(a, 5);
+}
+
+// CHECK-LABEL: test_psll_s_u8x8:
+// RV32: psll.dbs
+// RV64: psll.bs
+uint8x8_t test_psll_s_u8x8(uint8x8_t a, unsigned n) {
+ return __riscv_psll_s_u8x8(a, n);
+}
+
+// CHECK-LABEL: test_psll_s_i8x8:
+// RV32: psll.dbs
+// RV64: psll.bs
+int8x8_t test_psll_s_i8x8(int8x8_t a, unsigned n) {
+ return __riscv_psll_s_i8x8(a, n);
+}
+
+// CHECK-LABEL: test_psll_s_u16x4:
+// RV32: psll.dhs
+// RV64: psll.hs
+uint16x4_t test_psll_s_u16x4(uint16x4_t a, unsigned n) {
+ return __riscv_psll_s_u16x4(a, n);
+}
+
+// CHECK-LABEL: test_psll_s_i16x4:
+// RV32: psll.dhs
+// RV64: psll.hs
+int16x4_t test_psll_s_i16x4(int16x4_t a, unsigned n) {
+ return __riscv_psll_s_i16x4(a, n);
+}
+
+// CHECK-LABEL: test_psll_s_u32x2:
+// RV32: psll.dws
+// RV64: psll.ws
+uint32x2_t test_psll_s_u32x2(uint32x2_t a, unsigned n) {
+ return __riscv_psll_s_u32x2(a, n);
+}
+
+// CHECK-LABEL: test_psll_s_i32x2:
+// RV32: psll.dws
+// RV64: psll.ws
+int32x2_t test_psll_s_i32x2(int32x2_t a, unsigned n) {
+ return __riscv_psll_s_i32x2(a, n);
+}
+
+// CHECK-LABEL: test_psrl_s_u8x8:
+// RV32: psrl.dbs
+// RV64: psrl.bs
+uint8x8_t test_psrl_s_u8x8(uint8x8_t a, unsigned n) {
+ return __riscv_psrl_s_u8x8(a, n);
+}
+
+// CHECK-LABEL: test_psrl_s_u16x4:
+// RV32: psrl.dhs
+// RV64: psrl.hs
+uint16x4_t test_psrl_s_u16x4(uint16x4_t a, unsigned n) {
+ return __riscv_psrl_s_u16x4(a, n);
+}
+
+// CHECK-LABEL: test_psrl_s_u32x2:
+// RV32: psrl.dws
+// RV64: psrl.ws
+uint32x2_t test_psrl_s_u32x2(uint32x2_t a, unsigned n) {
+ return __riscv_psrl_s_u32x2(a, n);
+}
+
+// CHECK-LABEL: test_psra_s_i8x8:
+// RV32: psra.dbs
+// RV64: psra.bs
+int8x8_t test_psra_s_i8x8(int8x8_t a, unsigned n) {
+ return __riscv_psra_s_i8x8(a, n);
+}
+
+// CHECK-LABEL: test_psra_s_i16x4:
+// RV32: psra.dhs
+// RV64: psra.hs
+int16x4_t test_psra_s_i16x4(int16x4_t a, unsigned n) {
+ return __riscv_psra_s_i16x4(a, n);
+}
+
+// CHECK-LABEL: test_psra_s_i32x2:
+// RV32: psra.dws
+// RV64: psra.ws
+int32x2_t test_psra_s_i32x2(int32x2_t a, unsigned n) {
+ return __riscv_psra_s_i32x2(a, n);
+}
+
+// CHECK-LABEL: test_psll_s_u8x8_imm:
+// RV32: pslli.db{{[[:space:]]+}}{{.*}}, 2
+// RV64: pslli.b{{[[:space:]]+}}{{.*}}, 2
+uint8x8_t test_psll_s_u8x8_imm(uint8x8_t a) {
+ return __riscv_psll_s_u8x8(a, 2);
+}
+
+// CHECK-LABEL: test_psll_s_i8x8_imm:
+// RV32: pslli.db{{[[:space:]]+}}{{.*}}, 3
+// RV64: pslli.b{{[[:space:]]+}}{{.*}}, 3
+int8x8_t test_psll_s_i8x8_imm(int8x8_t a) { return __riscv_psll_s_i8x8(a, 3); }
+
+// CHECK-LABEL: test_psll_s_u16x4_imm:
+// RV32: pslli.dh{{[[:space:]]+}}{{.*}}, 4
+// RV64: pslli.h{{[[:space:]]+}}{{.*}}, 4
+uint16x4_t test_psll_s_u16x4_imm(uint16x4_t a) {
+ return __riscv_psll_s_u16x4(a, 4);
+}
+
+// CHECK-LABEL: test_psll_s_i16x4_imm:
+// RV32: pslli.dh{{[[:space:]]+}}{{.*}}, 5
+// RV64: pslli.h{{[[:space:]]+}}{{.*}}, 5
+int16x4_t test_psll_s_i16x4_imm(int16x4_t a) {
+ return __riscv_psll_s_i16x4(a, 5);
+}
+
+// CHECK-LABEL: test_psll_s_u32x2_imm:
+// RV32: pslli.dw{{[[:space:]]+}}{{.*}}, 7
+// RV64: pslli.w{{[[:space:]]+}}{{.*}}, 7
+uint32x2_t test_psll_s_u32x2_imm(uint32x2_t a) {
+ return __riscv_psll_s_u32x2(a, 7);
+}
+
+// CHECK-LABEL: test_psll_s_i32x2_imm:
+// RV32: pslli.dw{{[[:space:]]+}}{{.*}}, 9
+// RV64: pslli.w{{[[:space:]]+}}{{.*}}, 9
+int32x2_t test_psll_s_i32x2_imm(int32x2_t a) {
+ return __riscv_psll_s_i32x2(a, 9);
+}
+
+// CHECK-LABEL: test_psrl_s_u8x8_imm:
+// RV32: psrli.db{{[[:space:]]+}}{{.*}}, 2
+// RV64: psrli.b{{[[:space:]]+}}{{.*}}, 2
+uint8x8_t test_psrl_s_u8x8_imm(uint8x8_t a) {
+ return __riscv_psrl_s_u8x8(a, 2);
+}
+
+// CHECK-LABEL: test_psrl_s_u16x4_imm:
+// RV32: psrli.dh{{[[:space:]]+}}{{.*}}, 3
+// RV64: psrli.h{{[[:space:]]+}}{{.*}}, 3
+uint16x4_t test_psrl_s_u16x4_imm(uint16x4_t a) {
+ return __riscv_psrl_s_u16x4(a, 3);
+}
+
+// CHECK-LABEL: test_psrl_s_u32x2_imm:
+// RV32: psrli.dw{{[[:space:]]+}}{{.*}}, 5
+// RV64: psrli.w{{[[:space:]]+}}{{.*}}, 5
+uint32x2_t test_psrl_s_u32x2_imm(uint32x2_t a) {
+ return __riscv_psrl_s_u32x2(a, 5);
+}
+
+// CHECK-LABEL: test_psra_s_i8x8_imm:
+// RV32: psrai.db{{[[:space:]]+}}{{.*}}, 4
+// RV64: psrai.b{{[[:space:]]+}}{{.*}}, 4
+int8x8_t test_psra_s_i8x8_imm(int8x8_t a) { return __riscv_psra_s_i8x8(a, 4); }
+
+// CHECK-LABEL: test_psra_s_i16x4_imm:
+// RV32: psrai.dh{{[[:space:]]+}}{{.*}}, 5
+// RV64: psrai.h{{[[:space:]]+}}{{.*}}, 5
+int16x4_t test_psra_s_i16x4_imm(int16x4_t a) {
+ return __riscv_psra_s_i16x4(a, 5);
+}
+
+// CHECK-LABEL: test_psra_s_i32x2_imm:
+// RV32: psrai.dw{{[[:space:]]+}}{{.*}}, 11
+// RV64: psrai.w{{[[:space:]]+}}{{.*}}, 11
+int32x2_t test_psra_s_i32x2_imm(int32x2_t a) {
+ return __riscv_psra_s_i32x2(a, 11);
+}
+
// CHECK-LABEL: test_pand_i8x4:
// CHECK: and{{[[:space:]]}}
int8x4_t test_pand_i8x4(int8x4_t a, int8x4_t b) {
>From a3cb4957e99a9659b8e9643c1d0a55a95bc9d4a6 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Tue, 9 Jun 2026 08:31:25 +0800
Subject: [PATCH 18/19] [Clang][RISCV] disable clang-format on packed macro
call block
---
clang/lib/Headers/riscv_packed_simd.h | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/clang/lib/Headers/riscv_packed_simd.h b/clang/lib/Headers/riscv_packed_simd.h
index 1f4f33c5fafa1..a4667445d1b67 100644
--- a/clang/lib/Headers/riscv_packed_simd.h
+++ b/clang/lib/Headers/riscv_packed_simd.h
@@ -90,6 +90,9 @@ typedef uint32_t uint32x2_t __attribute__((__vector_size__(8)));
__builtin_elementwise_add_sat(__rs1, __rs1), __rs2); \
}
+// clang-format off: macro call sites have no trailing semicolons, which
+// confuses clang-format into a deeply nested expression.
+
/* Packed Splat (32-bit) */
__packed_splat(pmv_s_u8x4, uint8x4_t, uint8_t, __packed_splat4)
__packed_splat(pmv_s_i8x4, int8x4_t, int8_t, __packed_splat4)
@@ -282,6 +285,8 @@ __packed_unary_op(pnot_u16x4, uint16x4_t, ~)
__packed_unary_op(pnot_i32x2, int32x2_t, ~)
__packed_unary_op(pnot_u32x2, uint32x2_t, ~)
+// clang-format on
+
#undef __packed_splat2
#undef __packed_splat4
#undef __packed_splat8
>From 737c111a7276805190e38fea86a11e8e4466c2dd Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Tue, 9 Jun 2026 08:54:04 +0800
Subject: [PATCH 19/19] clang-format
---
clang/lib/Headers/riscv_packed_simd.h | 22 ++++++++-----------
.../riscv_packed_simd.c | 6 +++--
2 files changed, 13 insertions(+), 15 deletions(-)
diff --git a/clang/lib/Headers/riscv_packed_simd.h b/clang/lib/Headers/riscv_packed_simd.h
index a4667445d1b67..828cb90f8034a 100644
--- a/clang/lib/Headers/riscv_packed_simd.h
+++ b/clang/lib/Headers/riscv_packed_simd.h
@@ -42,23 +42,22 @@ typedef uint32_t uint32x2_t __attribute__((__vector_size__(8)));
}
#define __packed_shift(name, ty, op, mask) \
- static __inline__ ty __DEFAULT_FN_ATTRS \
- __riscv_##name(ty __rs1, unsigned __rs2) { \
- return __rs1 op (__rs2 & (mask)); \
+ static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, \
+ unsigned __rs2) { \
+ return __rs1 op(__rs2 & (mask)); \
}
#define __packed_shift8(name, ty, op) __packed_shift(name, ty, op, 0x7)
#define __packed_shift16(name, ty, op) __packed_shift(name, ty, op, 0xf)
#define __packed_shift32(name, ty, op) __packed_shift(name, ty, op, 0x1f)
#define __packed_scalar_binary_op(name, ty, scalar_ty, op, splat) \
- static __inline__ ty __DEFAULT_FN_ATTRS \
- __riscv_##name(ty __rs1, scalar_ty __rs2) { \
+ static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, \
+ scalar_ty __rs2) { \
return __rs1 op splat(ty, __rs2); \
}
#define __packed_binary_op(name, ty, op) \
- static __inline__ ty __DEFAULT_FN_ATTRS \
- __riscv_##name(ty __rs1, ty __rs2) { \
+ static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, ty __rs2) { \
return __rs1 op __rs2; \
}
@@ -68,14 +67,12 @@ typedef uint32_t uint32x2_t __attribute__((__vector_size__(8)));
}
#define __packed_binary_builtin(name, ty, builtin) \
- static __inline__ ty __DEFAULT_FN_ATTRS \
- __riscv_##name(ty __rs1, ty __rs2) { \
+ static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, ty __rs2) { \
return builtin(__rs1, __rs2); \
}
#define __packed_sh1add(name, ty) \
- static __inline__ ty __DEFAULT_FN_ATTRS \
- __riscv_##name(ty __rs1, ty __rs2) { \
+ static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, ty __rs2) { \
return (__rs1 << 1) + __rs2; \
}
@@ -84,8 +81,7 @@ typedef uint32_t uint32x2_t __attribute__((__vector_size__(8)));
* for signed types and the backend's saturating_shl1 PatFrags matches both
* shapes. */
#define __packed_sh1sadd(name, ty) \
- static __inline__ ty __DEFAULT_FN_ATTRS \
- __riscv_##name(ty __rs1, ty __rs2) { \
+ static __inline__ ty __DEFAULT_FN_ATTRS __riscv_##name(ty __rs1, ty __rs2) { \
return __builtin_elementwise_add_sat( \
__builtin_elementwise_add_sat(__rs1, __rs1), __rs2); \
}
diff --git a/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
index 5e9afc3cd5f99..a2c4b83360207 100644
--- a/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
+++ b/cross-project-tests/intrinsic-header-tests/riscv_packed_simd.c
@@ -2,10 +2,12 @@
// expected-no-diagnostics
// RUN: %clang %s -O2 -S -o - --target=riscv32 \
-// RUN: -menable-experimental-extensions -march=rv32i_p0p21 -Werror -Wextra -Xclang -verify \
+// RUN: -menable-experimental-extensions -march=rv32i_p0p21 \
+// RUN: -Werror -Wextra -Xclang -verify \
// RUN: | FileCheck %s --check-prefixes=CHECK,RV32
// RUN: %clang %s -O2 -S -o - --target=riscv64 \
-// RUN: -menable-experimental-extensions -march=rv64i_p0p21 -Werror -Wextra -Xclang -verify \
+// RUN: -menable-experimental-extensions -march=rv64i_p0p21 \
+// RUN: -Werror -Wextra -Xclang -verify \
// RUN: | FileCheck %s --check-prefixes=CHECK,RV64
#include <riscv_packed_simd.h>
More information about the cfe-commits
mailing list