[llvm] [RISCV][llvm] Preliminary P extension codegen support (PR #162668)
Brandon Wu via llvm-commits
llvm-commits at lists.llvm.org
Mon Nov 3 23:08:53 PST 2025
https://github.com/4vtomat updated https://github.com/llvm/llvm-project/pull/162668
>From ef7900d18bcf1ffaa68336d741a4f38672b482cc Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Wed, 8 Oct 2025 21:11:23 -0700
Subject: [PATCH 01/13] [RISCV][llvm] Preliminary P extension codegen support
This is the initial support of P extension codegen, it only includes
small part of instructions:
PADD_H, PADD_B,
PSADD_H, PSADD_B,
PAADD_H, PAADD_B,
PSADDU_H, PSADDU_B,
PAADDU_H, PAADDU_B,
PSUB_H, PSUB_B,
PDIF_H, PDIF_B,
PSSUB_H, PSSUB_B,
PASUB_H, PASUB_B,
PDIFU_H, PDIFU_B,
PSSUBU_H, PSSUBU_B,
PASUBU_H, PASUBU_B
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 29 ++
llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 124 +++++
llvm/lib/Target/RISCV/RISCVRegisterInfo.td | 6 +-
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 21 +
llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll | 426 +++++++++++++++++
llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll | 430 ++++++++++++++++++
6 files changed, 1035 insertions(+), 1 deletion(-)
create mode 100644 llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
create mode 100644 llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7123a2d706787..1eb8c9457ee6a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -279,6 +279,17 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
addRegisterClass(MVT::riscv_nxv32i8x2, &RISCV::VRN2M4RegClass);
}
+ // fixed vector is stored in GPRs for P extension packed operations
+ if (Subtarget.hasStdExtP()) {
+ addRegisterClass(MVT::v2i16, &RISCV::GPRRegClass);
+ addRegisterClass(MVT::v4i8, &RISCV::GPRRegClass);
+ if (Subtarget.is64Bit()) {
+ addRegisterClass(MVT::v2i32, &RISCV::GPRRegClass);
+ addRegisterClass(MVT::v4i16, &RISCV::GPRRegClass);
+ addRegisterClass(MVT::v8i8, &RISCV::GPRRegClass);
+ }
+ }
+
// Compute derived properties from the register classes.
computeRegisterProperties(STI.getRegisterInfo());
@@ -479,6 +490,24 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
ISD::FROUNDEVEN, ISD::FCANONICALIZE};
+ if (Subtarget.hasStdExtP()) {
+ // load/store are already handled by pattern matching
+ SmallVector<MVT, 2> VTs = {MVT::v2i16, MVT::v4i8};
+ if (Subtarget.is64Bit())
+ VTs.append({MVT::v2i32, MVT::v4i16, MVT::v8i8});
+ for (auto VT : VTs) {
+ setOperationAction(ISD::UADDSAT, VT, Legal);
+ setOperationAction(ISD::SADDSAT, VT, Legal);
+ setOperationAction(ISD::USUBSAT, VT, Legal);
+ setOperationAction(ISD::SSUBSAT, VT, Legal);
+ setOperationAction(ISD::SSHLSAT, VT, Legal);
+ setOperationAction(ISD::USHLSAT, VT, Legal);
+ setOperationAction(ISD::BITCAST, VT, Custom);
+ setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU}, VT, Legal);
+ setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Legal);
+ }
+ }
+
if (Subtarget.hasStdExtZfbfmin()) {
setOperationAction(ISD::BITCAST, MVT::i16, Custom);
setOperationAction(ISD::ConstantFP, MVT::bf16, Expand);
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 7d8a9192d9847..c5e2f12aafb1e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1455,3 +1455,127 @@ let Predicates = [HasStdExtP, IsRV32] in {
def PMAXU_DW : RVPPairBinaryExchanged_rr<0b1111, 0b01, "pmaxu.dw">;
def PMAXU_DB : RVPPairBinaryExchanged_rr<0b1111, 0b10, "pmaxu.db">;
} // Predicates = [HasStdExtP, IsRV32]
+
+let Predicates = [HasStdExtP, IsRV64] in {
+ // Basic arithmetic patterns for v4i16 (16-bit elements in 64-bit GPR)
+ def: Pat<(v4i16 (add v4i16:$rs1, v4i16:$rs2)), (!cast<Instruction>("PADD_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i16 (sub v4i16:$rs1, v4i16:$rs2)), (!cast<Instruction>("PSUB_H") GPR:$rs1, GPR:$rs2)>;
+
+ // Saturating add/sub patterns for v4i16
+ def: Pat<(v4i16 (saddsat v4i16:$rs1, v4i16:$rs2)), (!cast<Instruction>("PSADD_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i16 (uaddsat v4i16:$rs1, v4i16:$rs2)), (!cast<Instruction>("PSADDU_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i16 (ssubsat v4i16:$rs1, v4i16:$rs2)), (!cast<Instruction>("PSSUB_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i16 (usubsat v4i16:$rs1, v4i16:$rs2)), (!cast<Instruction>("PSSUBU_H") GPR:$rs1, GPR:$rs2)>;
+
+ // Averaging patterns for v4i16
+ def: Pat<(v4i16 (avgfloors v4i16:$rs1, v4i16:$rs2)), (!cast<Instruction>("PAADD_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i16 (avgflooru v4i16:$rs1, v4i16:$rs2)), (!cast<Instruction>("PAADDU_H") GPR:$rs1, GPR:$rs2)>;
+
+ // Averaging subtraction patterns for v4i16
+ // PASUB_H: signed (a - b) >> 1
+ def: Pat<(v4i16 (sra (sub v4i16:$rs1, v4i16:$rs2), (v4i16 (build_vector (XLenVT 1))))),
+ (!cast<Instruction>("PASUB_H") GPR:$rs1, GPR:$rs2)>;
+ // PASUBU_H: unsigned (a - b) >> 1
+ def: Pat<(v4i16 (srl (sub v4i16:$rs1, v4i16:$rs2), (v4i16 (build_vector (XLenVT 1))))),
+ (!cast<Instruction>("PASUBU_H") GPR:$rs1, GPR:$rs2)>;
+
+ // Absolute difference patterns for v4i16
+ def: Pat<(v4i16 (abds v4i16:$rs1, v4i16:$rs2)), (!cast<Instruction>("PDIF_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i16 (abdu v4i16:$rs1, v4i16:$rs2)), (!cast<Instruction>("PDIFU_H") GPR:$rs1, GPR:$rs2)>;
+
+ // Basic arithmetic patterns for v8i8 (8-bit elements in 64-bit GPR)
+ def: Pat<(v8i8 (add v8i8:$rs1, v8i8:$rs2)), (!cast<Instruction>("PADD_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v8i8 (sub v8i8:$rs1, v8i8:$rs2)), (!cast<Instruction>("PSUB_B") GPR:$rs1, GPR:$rs2)>;
+
+ // Saturating add/sub patterns for v8i8
+ def: Pat<(v8i8 (saddsat v8i8:$rs1, v8i8:$rs2)), (!cast<Instruction>("PSADD_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v8i8 (uaddsat v8i8:$rs1, v8i8:$rs2)), (!cast<Instruction>("PSADDU_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v8i8 (ssubsat v8i8:$rs1, v8i8:$rs2)), (!cast<Instruction>("PSSUB_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v8i8 (usubsat v8i8:$rs1, v8i8:$rs2)), (!cast<Instruction>("PSSUBU_B") GPR:$rs1, GPR:$rs2)>;
+
+ // Averaging patterns for v8i8
+ def: Pat<(v8i8 (avgfloors v8i8:$rs1, v8i8:$rs2)), (!cast<Instruction>("PAADD_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v8i8 (avgflooru v8i8:$rs1, v8i8:$rs2)), (!cast<Instruction>("PAADDU_B") GPR:$rs1, GPR:$rs2)>;
+
+ // Averaging subtraction patterns for v8i8
+ // PASUB_B: signed (a - b) >> 1
+ def: Pat<(v8i8 (sra (sub v8i8:$rs1, v8i8:$rs2), (v8i8 (build_vector (XLenVT 1))))),
+ (!cast<Instruction>("PASUB_B") GPR:$rs1, GPR:$rs2)>;
+ // PASUBU_B: unsigned (a - b) >> 1
+ def: Pat<(v8i8 (srl (sub v8i8:$rs1, v8i8:$rs2), (v8i8 (build_vector (XLenVT 1))))),
+ (!cast<Instruction>("PASUBU_B") GPR:$rs1, GPR:$rs2)>;
+
+ // Absolute difference patterns for v8i8
+ def: Pat<(v8i8 (abds v8i8:$rs1, v8i8:$rs2)), (!cast<Instruction>("PDIF_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v8i8 (abdu v8i8:$rs1, v8i8:$rs2)), (!cast<Instruction>("PDIFU_B") GPR:$rs1, GPR:$rs2)>;
+
+ // Load/Store patterns for v4i16 and v8i8 (use regular GPR load/store since they're in GPRs)
+ def : StPat<store, SD, GPR, v4i16>;
+ def : LdPat<load, LD, v4i16>;
+ def : StPat<store, SD, GPR, v8i8>;
+ def : LdPat<load, LD, v8i8>;
+
+ // Load/Store patterns for v2i32 (32-bit elements in 64-bit GPR)
+ def : StPat<store, SD, GPR, v2i32>;
+ def : LdPat<load, LD, v2i32>;
+} // Predicates = [HasStdExtP, IsRV64]
+
+let Predicates = [HasStdExtP, IsRV32] in {
+ // Basic arithmetic patterns for v2i16 (16-bit elements in 32-bit GPR)
+ def: Pat<(v2i16 (add v2i16:$rs1, v2i16:$rs2)), (!cast<Instruction>("PADD_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i16 (sub v2i16:$rs1, v2i16:$rs2)), (!cast<Instruction>("PSUB_H") GPR:$rs1, GPR:$rs2)>;
+
+ // Saturating add/sub patterns for v2i16
+ def: Pat<(v2i16 (saddsat v2i16:$rs1, v2i16:$rs2)), (!cast<Instruction>("PSADD_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i16 (uaddsat v2i16:$rs1, v2i16:$rs2)), (!cast<Instruction>("PSADDU_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i16 (ssubsat v2i16:$rs1, v2i16:$rs2)), (!cast<Instruction>("PSSUB_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i16 (usubsat v2i16:$rs1, v2i16:$rs2)), (!cast<Instruction>("PSSUBU_H") GPR:$rs1, GPR:$rs2)>;
+
+ // Averaging patterns for v2i16
+ def: Pat<(v2i16 (avgfloors v2i16:$rs1, v2i16:$rs2)), (!cast<Instruction>("PAADD_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i16 (avgflooru v2i16:$rs1, v2i16:$rs2)), (!cast<Instruction>("PAADDU_H") GPR:$rs1, GPR:$rs2)>;
+
+ // Averaging subtraction patterns for v2i16
+ // PASUB_H: signed (a - b) >> 1
+ def: Pat<(v2i16 (sra (sub v2i16:$rs1, v2i16:$rs2), (v2i16 (build_vector (XLenVT 1))))),
+ (!cast<Instruction>("PASUB_H") GPR:$rs1, GPR:$rs2)>;
+ // PASUBU_H: unsigned (a - b) >> 1
+ def: Pat<(v2i16 (srl (sub v2i16:$rs1, v2i16:$rs2), (v2i16 (build_vector (XLenVT 1))))),
+ (!cast<Instruction>("PASUBU_H") GPR:$rs1, GPR:$rs2)>;
+
+ // Absolute difference patterns for v2i16
+ def: Pat<(v2i16 (abds v2i16:$rs1, v2i16:$rs2)), (!cast<Instruction>("PDIF_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i16 (abdu v2i16:$rs1, v2i16:$rs2)), (!cast<Instruction>("PDIFU_H") GPR:$rs1, GPR:$rs2)>;
+
+ // Basic arithmetic patterns for v4i8 (8-bit elements in 32-bit GPR)
+ def: Pat<(v4i8 (add v4i8:$rs1, v4i8:$rs2)), (!cast<Instruction>("PADD_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i8 (sub v4i8:$rs1, v4i8:$rs2)), (!cast<Instruction>("PSUB_B") GPR:$rs1, GPR:$rs2)>;
+
+ // Saturating add/sub patterns for v4i8
+ def: Pat<(v4i8 (saddsat v4i8:$rs1, v4i8:$rs2)), (!cast<Instruction>("PSADD_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i8 (uaddsat v4i8:$rs1, v4i8:$rs2)), (!cast<Instruction>("PSADDU_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i8 (ssubsat v4i8:$rs1, v4i8:$rs2)), (!cast<Instruction>("PSSUB_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i8 (usubsat v4i8:$rs1, v4i8:$rs2)), (!cast<Instruction>("PSSUBU_B") GPR:$rs1, GPR:$rs2)>;
+
+ // Averaging patterns for v4i8
+ def: Pat<(v4i8 (avgfloors v4i8:$rs1, v4i8:$rs2)), (!cast<Instruction>("PAADD_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i8 (avgflooru v4i8:$rs1, v4i8:$rs2)), (!cast<Instruction>("PAADDU_B") GPR:$rs1, GPR:$rs2)>;
+
+ // Averaging subtraction patterns for v4i8
+ // PASUB_B: signed (a - b) >> 1
+ def: Pat<(v4i8 (sra (sub v4i8:$rs1, v4i8:$rs2), (v4i8 (build_vector (XLenVT 1))))),
+ (!cast<Instruction>("PASUB_B") GPR:$rs1, GPR:$rs2)>;
+ // PASUBU_B: unsigned (a - b) >> 1
+ def: Pat<(v4i8 (srl (sub v4i8:$rs1, v4i8:$rs2), (v4i8 (build_vector (XLenVT 1))))),
+ (!cast<Instruction>("PASUBU_B") GPR:$rs1, GPR:$rs2)>;
+
+ // Absolute difference patterns for v4i8
+ def: Pat<(v4i8 (abds v4i8:$rs1, v4i8:$rs2)), (!cast<Instruction>("PDIF_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i8 (abdu v4i8:$rs1, v4i8:$rs2)), (!cast<Instruction>("PDIFU_B") GPR:$rs1, GPR:$rs2)>;
+
+ // Load/Store patterns for v2i16 and v4i8 (use regular GPR load/store since they're in GPRs)
+ def : StPat<store, SW, GPR, v2i16>;
+ def : LdPat<load, LW, v2i16>;
+ def : StPat<store, SW, GPR, v4i8>;
+ def : LdPat<load, LW, v4i8>;
+} // Predicates = [HasStdExtP, IsRV32]
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index 6605a5ccdfde2..fcbb93a55375b 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -238,7 +238,11 @@ class RISCVRegisterClass<list<ValueType> regTypes, int align, dag regList>
}
class GPRRegisterClass<dag regList>
- : RISCVRegisterClass<[XLenVT, XLenFVT], 32, regList> {
+ : RISCVRegisterClass<[XLenVT, XLenFVT,
+ // P extension packed vector types:
+ // RV32: v2i16, v4i8
+ // RV64: v2i32, v4i16, v8i8
+ v2i16, v4i8, v2i32, v4i16, v8i8], 32, regList> {
let RegInfos = XLenRI;
}
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index 7bc0b5b394828..e669175a3d8e1 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -969,6 +969,13 @@ InstructionCost RISCVTTIImpl::getScalarizationOverhead(
if (isa<ScalableVectorType>(Ty))
return InstructionCost::getInvalid();
+ // TODO: Add proper cost model for P extension fixed vectors (e.g., v4i16)
+ // For now, skip all fixed vector cost analysis when P extension is available
+ // to avoid crashes in getMinRVVVectorSizeInBits()
+ if (ST->hasStdExtP() && isa<FixedVectorType>(Ty)) {
+ return 1; // Treat as single instruction cost for now
+ }
+
// A build_vector (which is m1 sized or smaller) can be done in no
// worse than one vslide1down.vx per element in the type. We could
// in theory do an explode_vector in the inverse manner, but our
@@ -1625,6 +1632,13 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
if (!IsVectorType)
return BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
+ // TODO: Add proper cost model for P extension fixed vectors (e.g., v4i16)
+ // For now, skip all fixed vector cost analysis when P extension is available
+ // to avoid crashes in getMinRVVVectorSizeInBits()
+ if (ST->hasStdExtP() && (isa<FixedVectorType>(Dst) || isa<FixedVectorType>(Src))) {
+ return 1; // Treat as single instruction cost for now
+ }
+
// FIXME: Need to compute legalizing cost for illegal types. The current
// code handles only legal types and those which can be trivially
// promoted to legal.
@@ -2321,6 +2335,13 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
const Value *Op1) const {
assert(Val->isVectorTy() && "This must be a vector type");
+ // TODO: Add proper cost model for P extension fixed vectors (e.g., v4i16)
+ // For now, skip all fixed vector cost analysis when P extension is available
+ // to avoid crashes in getMinRVVVectorSizeInBits()
+ if (ST->hasStdExtP() && isa<FixedVectorType>(Val)) {
+ return 1; // Treat as single instruction cost for now
+ }
+
if (Opcode != Instruction::ExtractElement &&
Opcode != Instruction::InsertElement)
return BaseT::getVectorInstrCost(Opcode, Val, CostKind, Index, Op0, Op1);
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
new file mode 100644
index 0000000000000..8a4ab1d545f41
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
@@ -0,0 +1,426 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -verify-machineinstrs < %s | FileCheck %s
+
+; Test basic add/sub operations for v2i16
+define void @test_padd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_padd_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: padd.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %res = add <2 x i16> %a, %b
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_psub_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_psub_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: psub.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %res = sub <2 x i16> %a, %b
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test basic add/sub operations for v4i8
+define void @test_padd_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_padd_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: padd.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %res = add <4 x i8> %a, %b
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_psub_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_psub_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: psub.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %res = sub <4 x i8> %a, %b
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test saturating add operations for v2i16
+define void @test_psadd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_psadd_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: psadd.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %res = call <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16> %a, <2 x i16> %b)
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_psaddu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_psaddu_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: psaddu.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %res = call <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16> %a, <2 x i16> %b)
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test saturating sub operations for v2i16
+define void @test_pssub_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pssub_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pssub.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %res = call <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16> %a, <2 x i16> %b)
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_pssubu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pssubu_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pssubu.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %res = call <2 x i16> @llvm.usub.sat.v2i16(<2 x i16> %a, <2 x i16> %b)
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test saturating add operations for v4i8
+define void @test_psadd_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_psadd_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: psadd.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %res = call <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8> %a, <4 x i8> %b)
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_psaddu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_psaddu_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: psaddu.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %res = call <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8> %a, <4 x i8> %b)
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test saturating sub operations for v4i8
+define void @test_pssub_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pssub_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pssub.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %res = call <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8> %a, <4 x i8> %b)
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_pssubu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pssubu_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pssubu.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %res = call <4 x i8> @llvm.usub.sat.v4i8(<4 x i8> %a, <4 x i8> %b)
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test averaging floor signed operations for v2i16
+define void @test_paadd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_paadd_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: paadd.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %ext.a = sext <2 x i16> %a to <2 x i32>
+ %ext.b = sext <2 x i16> %b to <2 x i32>
+ %add = add nsw <2 x i32> %ext.a, %ext.b
+ %shift = ashr <2 x i32> %add, <i32 1, i32 1>
+ %res = trunc <2 x i32> %shift to <2 x i16>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test averaging floor unsigned operations for v2i16
+define void @test_paaddu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_paaddu_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: paaddu.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %and = and <2 x i16> %a, %b
+ %xor = xor <2 x i16> %a, %b
+ %shift = lshr <2 x i16> %xor, <i16 1, i16 1>
+ %res = add <2 x i16> %and, %shift
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test averaging floor signed operations for v4i8
+define void @test_paadd_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_paadd_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: paadd.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %ext.a = sext <4 x i8> %a to <4 x i16>
+ %ext.b = sext <4 x i8> %b to <4 x i16>
+ %add = add nsw <4 x i16> %ext.a, %ext.b
+ %shift = ashr <4 x i16> %add, <i16 1, i16 1, i16 1, i16 1>
+ %res = trunc <4 x i16> %shift to <4 x i8>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test averaging floor unsigned operations for v4i8
+define void @test_paaddu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_paaddu_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: paaddu.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %and = and <4 x i8> %a, %b
+ %xor = xor <4 x i8> %a, %b
+ %shift = lshr <4 x i8> %xor, <i8 1, i8 1, i8 1, i8 1>
+ %res = add <4 x i8> %and, %shift
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test absolute difference signed for v2i16
+define void @test_pdif_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pdif_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pdif.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %min = call <2 x i16> @llvm.smin.v2i16(<2 x i16> %a, <2 x i16> %b)
+ %max = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %a, <2 x i16> %b)
+ %res = sub <2 x i16> %max, %min
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test absolute difference unsigned for v2i16
+define void @test_pdifu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pdifu_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pdifu.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %min = call <2 x i16> @llvm.umin.v2i16(<2 x i16> %a, <2 x i16> %b)
+ %max = call <2 x i16> @llvm.umax.v2i16(<2 x i16> %a, <2 x i16> %b)
+ %res = sub <2 x i16> %max, %min
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test absolute difference signed for v4i8
+define void @test_pdif_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pdif_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pdif.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %min = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %a, <4 x i8> %b)
+ %max = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %a, <4 x i8> %b)
+ %res = sub <4 x i8> %max, %min
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test absolute difference unsigned for v4i8
+define void @test_pdifu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pdifu_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pdifu.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %min = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %a, <4 x i8> %b)
+ %max = call <4 x i8> @llvm.umax.v4i8(<4 x i8> %a, <4 x i8> %b)
+ %res = sub <4 x i8> %max, %min
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test averaging floor subtraction signed for v2i16
+; pasub pattern: (a - b) arithmetic shift right 1
+define void @test_pasub_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pasub_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pasub.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %sub = sub <2 x i16> %a, %b
+ %res = ashr <2 x i16> %sub, <i16 1, i16 1>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test averaging floor subtraction unsigned for v2i16
+; pasubu pattern: (a - b) logical shift right 1
+define void @test_pasubu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pasubu_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pasubu.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %sub = sub <2 x i16> %a, %b
+ %res = lshr <2 x i16> %sub, <i16 1, i16 1>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test averaging floor subtraction signed for v4i8
+define void @test_pasub_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pasub_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pasub.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %sub = sub <4 x i8> %a, %b
+ %res = ashr <4 x i8> %sub, <i8 1, i8 1, i8 1, i8 1>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test averaging floor subtraction unsigned for v4i8
+define void @test_pasubu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pasubu_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pasubu.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %sub = sub <4 x i8> %a, %b
+ %res = lshr <4 x i8> %sub, <i8 1, i8 1, i8 1, i8 1>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+; Intrinsic declarations
+declare <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16>, <2 x i16>)
+declare <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16>, <2 x i16>)
+declare <2 x i16> @llvm.ssub.sat.v2i16(<2 x i16>, <2 x i16>)
+declare <2 x i16> @llvm.usub.sat.v2i16(<2 x i16>, <2 x i16>)
+declare <4 x i8> @llvm.sadd.sat.v4i8(<4 x i8>, <4 x i8>)
+declare <4 x i8> @llvm.uadd.sat.v4i8(<4 x i8>, <4 x i8>)
+declare <4 x i8> @llvm.ssub.sat.v4i8(<4 x i8>, <4 x i8>)
+declare <4 x i8> @llvm.usub.sat.v4i8(<4 x i8>, <4 x i8>)
+declare <2 x i16> @llvm.smin.v2i16(<2 x i16>, <2 x i16>)
+declare <2 x i16> @llvm.smax.v2i16(<2 x i16>, <2 x i16>)
+declare <2 x i16> @llvm.umin.v2i16(<2 x i16>, <2 x i16>)
+declare <2 x i16> @llvm.umax.v2i16(<2 x i16>, <2 x i16>)
+declare <4 x i8> @llvm.smin.v4i8(<4 x i8>, <4 x i8>)
+declare <4 x i8> @llvm.smax.v4i8(<4 x i8>, <4 x i8>)
+declare <4 x i8> @llvm.umin.v4i8(<4 x i8>, <4 x i8>)
+declare <4 x i8> @llvm.umax.v4i8(<4 x i8>, <4 x i8>)
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
new file mode 100644
index 0000000000000..d4918e4e0aa62
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
@@ -0,0 +1,430 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s | FileCheck %s
+
+; Test basic add/sub operations for v4i16
+define void @test_padd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_padd_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: padd.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %res = add <4 x i16> %a, %b
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_psub_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_psub_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: psub.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %res = sub <4 x i16> %a, %b
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test basic add/sub operations for v8i8
+define void @test_padd_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_padd_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: padd.b a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %res = add <8 x i8> %a, %b
+ store <8 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_psub_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_psub_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: psub.b a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %res = sub <8 x i8> %a, %b
+ store <8 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test saturating add operations for v4i16
+define void @test_psadd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_psadd_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: psadd.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %res = call <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_psaddu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_psaddu_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: psaddu.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %res = call <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test saturating sub operations for v4i16
+define void @test_pssub_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pssub_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pssub.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %res = call <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_pssubu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pssubu_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pssubu.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %res = call <4 x i16> @llvm.usub.sat.v4i16(<4 x i16> %a, <4 x i16> %b)
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test saturating add operations for v8i8
+define void @test_psadd_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_psadd_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: psadd.b a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %res = call <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
+ store <8 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_psaddu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_psaddu_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: psaddu.b a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %res = call <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
+ store <8 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test saturating sub operations for v8i8
+define void @test_pssub_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pssub_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pssub.b a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %res = call <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
+ store <8 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+define void @test_pssubu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pssubu_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pssubu.b a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %res = call <8 x i8> @llvm.usub.sat.v8i8(<8 x i8> %a, <8 x i8> %b)
+ store <8 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test averaging floor signed operations for v4i16
+; avgfloors pattern: (a + b) arithmetic shift right 1
+define void @test_paadd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_paadd_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: paadd.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %ext.a = sext <4 x i16> %a to <4 x i32>
+ %ext.b = sext <4 x i16> %b to <4 x i32>
+ %add = add nsw <4 x i32> %ext.a, %ext.b
+ %shift = ashr <4 x i32> %add, <i32 1, i32 1, i32 1, i32 1>
+ %res = trunc <4 x i32> %shift to <4 x i16>
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test averaging floor unsigned operations for v4i16
+; avgflooru pattern: (a & b) + ((a ^ b) >> 1)
+define void @test_paaddu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_paaddu_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: paaddu.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %and = and <4 x i16> %a, %b
+ %xor = xor <4 x i16> %a, %b
+ %shift = lshr <4 x i16> %xor, <i16 1, i16 1, i16 1, i16 1>
+ %res = add <4 x i16> %and, %shift
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test averaging floor signed operations for v8i8
+define void @test_paadd_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_paadd_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: paadd.b a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %ext.a = sext <8 x i8> %a to <8 x i16>
+ %ext.b = sext <8 x i8> %b to <8 x i16>
+ %add = add nsw <8 x i16> %ext.a, %ext.b
+ %shift = ashr <8 x i16> %add, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %res = trunc <8 x i16> %shift to <8 x i8>
+ store <8 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test averaging floor unsigned operations for v8i8
+define void @test_paaddu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_paaddu_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: paaddu.b a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %and = and <8 x i8> %a, %b
+ %xor = xor <8 x i8> %a, %b
+ %shift = lshr <8 x i8> %xor, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %res = add <8 x i8> %and, %shift
+ store <8 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test absolute difference signed for v4i16
+; abds pattern: sub(smax(a,b), smin(a,b))
+define void @test_pdif_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pdif_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pdif.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %min = call <4 x i16> @llvm.smin.v4i16(<4 x i16> %a, <4 x i16> %b)
+ %max = call <4 x i16> @llvm.smax.v4i16(<4 x i16> %a, <4 x i16> %b)
+ %res = sub <4 x i16> %max, %min
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test absolute difference unsigned for v4i16
+; abdu pattern: sub(umax(a,b), umin(a,b))
+define void @test_pdifu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pdifu_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pdifu.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %min = call <4 x i16> @llvm.umin.v4i16(<4 x i16> %a, <4 x i16> %b)
+ %max = call <4 x i16> @llvm.umax.v4i16(<4 x i16> %a, <4 x i16> %b)
+ %res = sub <4 x i16> %max, %min
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test absolute difference signed for v8i8
+define void @test_pdif_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pdif_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pdif.b a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %min = call <8 x i8> @llvm.smin.v8i8(<8 x i8> %a, <8 x i8> %b)
+ %max = call <8 x i8> @llvm.smax.v8i8(<8 x i8> %a, <8 x i8> %b)
+ %res = sub <8 x i8> %max, %min
+ store <8 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test absolute difference unsigned for v8i8
+define void @test_pdifu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pdifu_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pdifu.b a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %min = call <8 x i8> @llvm.umin.v8i8(<8 x i8> %a, <8 x i8> %b)
+ %max = call <8 x i8> @llvm.umax.v8i8(<8 x i8> %a, <8 x i8> %b)
+ %res = sub <8 x i8> %max, %min
+ store <8 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test averaging floor subtraction signed for v4i16
+; pasub pattern: (a - b) arithmetic shift right 1
+define void @test_pasub_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pasub_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pasub.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %sub = sub <4 x i16> %a, %b
+ %res = ashr <4 x i16> %sub, <i16 1, i16 1, i16 1, i16 1>
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test averaging floor subtraction unsigned for v4i16
+; pasubu pattern: (a - b) logical shift right 1
+define void @test_pasubu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pasubu_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pasubu.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %sub = sub <4 x i16> %a, %b
+ %res = lshr <4 x i16> %sub, <i16 1, i16 1, i16 1, i16 1>
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test averaging floor subtraction signed for v8i8
+define void @test_pasub_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pasub_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pasub.b a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %sub = sub <8 x i8> %a, %b
+ %res = ashr <8 x i8> %sub, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ store <8 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test averaging floor subtraction unsigned for v8i8
+define void @test_pasubu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_pasubu_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pasubu.b a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %sub = sub <8 x i8> %a, %b
+ %res = lshr <8 x i8> %sub, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ store <8 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
+; Intrinsic declarations
+declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.ssub.sat.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.usub.sat.v4i16(<4 x i16>, <4 x i16>)
+declare <8 x i8> @llvm.sadd.sat.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.uadd.sat.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.ssub.sat.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.usub.sat.v8i8(<8 x i8>, <8 x i8>)
+declare <4 x i16> @llvm.smin.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.smax.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.umin.v4i16(<4 x i16>, <4 x i16>)
+declare <4 x i16> @llvm.umax.v4i16(<4 x i16>, <4 x i16>)
+declare <8 x i8> @llvm.smin.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.smax.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.umin.v8i8(<8 x i8>, <8 x i8>)
+declare <8 x i8> @llvm.umax.v8i8(<8 x i8>, <8 x i8>)
>From bc91902b4aae954be8efd5b505decc0b5570bf35 Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Wed, 15 Oct 2025 02:05:07 -0700
Subject: [PATCH 02/13] fixup! fix pasub, add BUILD_VECTOR, fix comments
---
.../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 1 +
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 126 ++++++++++++++-
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 8 +
llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 146 ++++++++----------
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 3 +-
llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll | 58 +++++--
llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll | 70 +++++++--
7 files changed, 303 insertions(+), 109 deletions(-)
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 21dbb7cbc9844..a50e19a85263f 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -809,6 +809,7 @@ struct RISCVOperand final : public MCParsedAsmOperand {
bool isSImm5() const { return isSImm<5>(); }
bool isSImm6() const { return isSImm<6>(); }
+ bool isSImm8() const { return isSImm<8>(); }
bool isSImm10() const { return isSImm<10>(); }
bool isSImm11() const { return isSImm<11>(); }
bool isSImm12() const { return isSImm<12>(); }
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 1eb8c9457ee6a..33c0e6bc66ad9 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -491,11 +491,19 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::FROUNDEVEN, ISD::FCANONICALIZE};
if (Subtarget.hasStdExtP()) {
+ setTargetDAGCombine(ISD::TRUNCATE);
+ setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
+ setTruncStoreAction(MVT::v4i16, MVT::v4i8, Expand);
// load/store are already handled by pattern matching
SmallVector<MVT, 2> VTs = {MVT::v2i16, MVT::v4i8};
- if (Subtarget.is64Bit())
+ if (Subtarget.is64Bit()) {
VTs.append({MVT::v2i32, MVT::v4i16, MVT::v8i8});
+ setTruncStoreAction(MVT::v2i64, MVT::v2i32, Expand);
+ setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
+ setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand);
+ }
for (auto VT : VTs) {
+ setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::SADDSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
@@ -4340,6 +4348,34 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
MVT XLenVT = Subtarget.getXLenVT();
SDLoc DL(Op);
+ // Handle P extension packed vector BUILD_VECTOR with PLI for splat constants
+ if (Subtarget.hasStdExtP()) {
+ bool IsPExtVector =
+ (VT == MVT::v2i16 || VT == MVT::v4i8) ||
+ (Subtarget.is64Bit() &&
+ (VT == MVT::v4i16 || VT == MVT::v8i8 || VT == MVT::v2i32));
+ if (IsPExtVector) {
+ if (SDValue SplatValue = cast<BuildVectorSDNode>(Op)->getSplatValue()) {
+ if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
+ int64_t SplatImm = C->getSExtValue();
+ bool IsValidImm = false;
+
+ // Check immediate range based on vector type
+ if (VT == MVT::v8i8 || VT == MVT::v4i8)
+ // PLI_B uses 8-bit unsigned immediate
+ IsValidImm = isUInt<8>(SplatImm);
+ else
+ // PLI_H and PLI_W use 10-bit signed immediate
+ IsValidImm = isInt<10>(SplatImm);
+
+ if (IsValidImm) {
+ SDValue Imm = DAG.getConstant(SplatImm, DL, XLenVT);
+ return DAG.getNode(RISCVISD::PLI, DL, VT, Imm);
+ }
+ }
+ }
+ }
+ }
// Proper support for f16 requires Zvfh. bf16 always requires special
// handling. We need to cast the scalar to integer and create an integer
@@ -16025,11 +16061,99 @@ static SDValue combineTruncSelectToSMaxUSat(SDNode *N, SelectionDAG &DAG) {
return DAG.getNode(ISD::TRUNCATE, DL, VT, Min);
}
+// Handle P extension averaging subtraction pattern:
+// (vXiY (trunc (srl (sub ([s|z]ext vXiY:$a), ([s|z]ext vXiY:$b)), 1)))
+// -> PASUB/PASUBU
+static SDValue combinePExtTruncate(SDNode *N, SelectionDAG &DAG,
+ const RISCVSubtarget &Subtarget) {
+ SDValue N0 = N->getOperand(0);
+ EVT VT = N->getValueType(0);
+ if (!Subtarget.hasStdExtP() || !VT.isFixedLengthVector())
+ return SDValue();
+
+ if (N0.getOpcode() != ISD::SRL)
+ return SDValue();
+
+ // Check if shift amount is 1
+ SDValue ShAmt = N0.getOperand(1);
+ if (ShAmt.getOpcode() != ISD::BUILD_VECTOR)
+ return SDValue();
+
+ BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(ShAmt.getNode());
+ if (!BV)
+ return SDValue();
+ SDValue Splat = BV->getSplatValue();
+ if (!Splat)
+ return SDValue();
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Splat);
+ if (!C)
+ return SDValue();
+ if (C->getZExtValue() != 1)
+ return SDValue();
+
+ // Check for SUB operation
+ SDValue Sub = N0.getOperand(0);
+ if (Sub.getOpcode() != ISD::SUB)
+ return SDValue();
+
+ SDValue LHS = Sub.getOperand(0);
+ SDValue RHS = Sub.getOperand(1);
+
+ // Check if both operands are sign/zero extends from the target
+ // type
+ bool IsSignExt = LHS.getOpcode() == ISD::SIGN_EXTEND &&
+ RHS.getOpcode() == ISD::SIGN_EXTEND;
+ bool IsZeroExt = LHS.getOpcode() == ISD::ZERO_EXTEND &&
+ RHS.getOpcode() == ISD::ZERO_EXTEND;
+
+ if (!IsSignExt && !IsZeroExt)
+ return SDValue();
+
+ SDValue A = LHS.getOperand(0);
+ SDValue B = RHS.getOperand(0);
+
+ // Check if the extends are from our target vector type
+ if (A.getValueType() != VT || B.getValueType() != VT)
+ return SDValue();
+
+ // Determine the instruction based on type and signedness
+ unsigned Opc;
+ MVT VecVT = VT.getSimpleVT();
+ if (VecVT == MVT::v4i16 && IsSignExt)
+ Opc = RISCV::PASUB_H;
+ else if (VecVT == MVT::v4i16 && IsZeroExt)
+ Opc = RISCV::PASUBU_H;
+ else if (VecVT == MVT::v2i16 && IsSignExt)
+ Opc = RISCV::PASUB_H;
+ else if (VecVT == MVT::v2i16 && IsZeroExt)
+ Opc = RISCV::PASUBU_H;
+ else if (VecVT == MVT::v8i8 && IsSignExt)
+ Opc = RISCV::PASUB_B;
+ else if (VecVT == MVT::v8i8 && IsZeroExt)
+ Opc = RISCV::PASUBU_B;
+ else if (VecVT == MVT::v4i8 && IsSignExt)
+ Opc = RISCV::PASUB_B;
+ else if (VecVT == MVT::v4i8 && IsZeroExt)
+ Opc = RISCV::PASUBU_B;
+ else if (VecVT == MVT::v2i32 && IsSignExt)
+ Opc = RISCV::PASUB_W;
+ else if (VecVT == MVT::v2i32 && IsZeroExt)
+ Opc = RISCV::PASUBU_W;
+ else
+ return SDValue();
+
+ // Create the machine node directly
+ return SDValue(DAG.getMachineNode(Opc, SDLoc(N), VT, {A, B}), 0);
+}
+
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
+ if (Subtarget.hasStdExtP() && VT.isFixedLengthVector())
+ return combinePExtTruncate(N, DAG, Subtarget);
+
// Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
// extending X. This is safe since we only need the LSB after the shift and
// shift amounts larger than 31 would produce poison. If we wait until
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index 96e1078467f19..7b143311d27e0 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2895,6 +2895,12 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
case RISCVOp::OPERAND_UIMM9_LSB000:
Ok = isShiftedUInt<6, 3>(Imm);
break;
+ case RISCVOp::OPERAND_SIMM8_UNSIGNED:
+ Ok = isInt<8>(Imm);
+ break;
+ case RISCVOp::OPERAND_SIMM10_UNSIGNED:
+ Ok = isInt<10>(Imm);
+ break;
case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO:
Ok = isShiftedInt<6, 4>(Imm) && (Imm != 0);
break;
@@ -2916,6 +2922,8 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
// clang-format off
CASE_OPERAND_SIMM(5)
CASE_OPERAND_SIMM(6)
+ CASE_OPERAND_SIMM(8)
+ CASE_OPERAND_SIMM(10)
CASE_OPERAND_SIMM(11)
CASE_OPERAND_SIMM(12)
CASE_OPERAND_SIMM(26)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index c5e2f12aafb1e..e6d4aa8070e0a 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -18,7 +18,7 @@
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
-def simm10 : RISCVSImmOp<10>;
+def simm10 : RISCVSImmLeafOp<10>;
def SImm8UnsignedAsmOperand : SImmAsmOperand<8, "Unsigned"> {
let RenderMethod = "addSImm8UnsignedOperands";
@@ -26,7 +26,7 @@ def SImm8UnsignedAsmOperand : SImmAsmOperand<8, "Unsigned"> {
// A 8-bit signed immediate allowing range [-128, 255]
// but represented as [-128, 127].
-def simm8_unsigned : RISCVOp {
+def simm8_unsigned : RISCVSImmLeafOp<8> {
let ParserMatchClass = SImm8UnsignedAsmOperand;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeSImmOperand<8>";
@@ -1456,59 +1456,49 @@ let Predicates = [HasStdExtP, IsRV32] in {
def PMAXU_DB : RVPPairBinaryExchanged_rr<0b1111, 0b10, "pmaxu.db">;
} // Predicates = [HasStdExtP, IsRV32]
+def SDT_RISCVPLI : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>;
+def riscv_pli : RVSDNode<"PLI", SDT_RISCVPLI>;
let Predicates = [HasStdExtP, IsRV64] in {
// Basic arithmetic patterns for v4i16 (16-bit elements in 64-bit GPR)
- def: Pat<(v4i16 (add v4i16:$rs1, v4i16:$rs2)), (!cast<Instruction>("PADD_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i16 (sub v4i16:$rs1, v4i16:$rs2)), (!cast<Instruction>("PSUB_H") GPR:$rs1, GPR:$rs2)>;
-
+ def: Pat<(v4i16 (add (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PADD_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i16 (sub (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PSUB_H") GPR:$rs1, GPR:$rs2)>;
+
// Saturating add/sub patterns for v4i16
- def: Pat<(v4i16 (saddsat v4i16:$rs1, v4i16:$rs2)), (!cast<Instruction>("PSADD_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i16 (uaddsat v4i16:$rs1, v4i16:$rs2)), (!cast<Instruction>("PSADDU_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i16 (ssubsat v4i16:$rs1, v4i16:$rs2)), (!cast<Instruction>("PSSUB_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i16 (usubsat v4i16:$rs1, v4i16:$rs2)), (!cast<Instruction>("PSSUBU_H") GPR:$rs1, GPR:$rs2)>;
-
+ def: Pat<(v4i16 (saddsat (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PSADD_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i16 (uaddsat (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PSADDU_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i16 (ssubsat (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PSSUB_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i16 (usubsat (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PSSUBU_H") GPR:$rs1, GPR:$rs2)>;
+
// Averaging patterns for v4i16
- def: Pat<(v4i16 (avgfloors v4i16:$rs1, v4i16:$rs2)), (!cast<Instruction>("PAADD_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i16 (avgflooru v4i16:$rs1, v4i16:$rs2)), (!cast<Instruction>("PAADDU_H") GPR:$rs1, GPR:$rs2)>;
-
- // Averaging subtraction patterns for v4i16
- // PASUB_H: signed (a - b) >> 1
- def: Pat<(v4i16 (sra (sub v4i16:$rs1, v4i16:$rs2), (v4i16 (build_vector (XLenVT 1))))),
- (!cast<Instruction>("PASUB_H") GPR:$rs1, GPR:$rs2)>;
- // PASUBU_H: unsigned (a - b) >> 1
- def: Pat<(v4i16 (srl (sub v4i16:$rs1, v4i16:$rs2), (v4i16 (build_vector (XLenVT 1))))),
- (!cast<Instruction>("PASUBU_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i16 (avgfloors (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PAADD_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i16 (avgflooru (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PAADDU_H") GPR:$rs1, GPR:$rs2)>;
// Absolute difference patterns for v4i16
- def: Pat<(v4i16 (abds v4i16:$rs1, v4i16:$rs2)), (!cast<Instruction>("PDIF_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i16 (abdu v4i16:$rs1, v4i16:$rs2)), (!cast<Instruction>("PDIFU_H") GPR:$rs1, GPR:$rs2)>;
-
- // Basic arithmetic patterns for v8i8 (8-bit elements in 64-bit GPR)
- def: Pat<(v8i8 (add v8i8:$rs1, v8i8:$rs2)), (!cast<Instruction>("PADD_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v8i8 (sub v8i8:$rs1, v8i8:$rs2)), (!cast<Instruction>("PSUB_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i16 (abds (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PDIF_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i16 (abdu (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PDIFU_H") GPR:$rs1, GPR:$rs2)>;
+ // Basic arithmetic patterns for v8i8 (8-bit elements in 64-bit GPR)
+ def: Pat<(v8i8 (add (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PADD_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v8i8 (sub (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PSUB_B") GPR:$rs1, GPR:$rs2)>;
+
// Saturating add/sub patterns for v8i8
- def: Pat<(v8i8 (saddsat v8i8:$rs1, v8i8:$rs2)), (!cast<Instruction>("PSADD_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v8i8 (uaddsat v8i8:$rs1, v8i8:$rs2)), (!cast<Instruction>("PSADDU_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v8i8 (ssubsat v8i8:$rs1, v8i8:$rs2)), (!cast<Instruction>("PSSUB_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v8i8 (usubsat v8i8:$rs1, v8i8:$rs2)), (!cast<Instruction>("PSSUBU_B") GPR:$rs1, GPR:$rs2)>;
-
+ def: Pat<(v8i8 (saddsat (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PSADD_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v8i8 (uaddsat (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PSADDU_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v8i8 (ssubsat (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PSSUB_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v8i8 (usubsat (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PSSUBU_B") GPR:$rs1, GPR:$rs2)>;
+
// Averaging patterns for v8i8
- def: Pat<(v8i8 (avgfloors v8i8:$rs1, v8i8:$rs2)), (!cast<Instruction>("PAADD_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v8i8 (avgflooru v8i8:$rs1, v8i8:$rs2)), (!cast<Instruction>("PAADDU_B") GPR:$rs1, GPR:$rs2)>;
-
- // Averaging subtraction patterns for v8i8
- // PASUB_B: signed (a - b) >> 1
- def: Pat<(v8i8 (sra (sub v8i8:$rs1, v8i8:$rs2), (v8i8 (build_vector (XLenVT 1))))),
- (!cast<Instruction>("PASUB_B") GPR:$rs1, GPR:$rs2)>;
- // PASUBU_B: unsigned (a - b) >> 1
- def: Pat<(v8i8 (srl (sub v8i8:$rs1, v8i8:$rs2), (v8i8 (build_vector (XLenVT 1))))),
- (!cast<Instruction>("PASUBU_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v8i8 (avgfloors (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PAADD_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v8i8 (avgflooru (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PAADDU_B") GPR:$rs1, GPR:$rs2)>;
// Absolute difference patterns for v8i8
- def: Pat<(v8i8 (abds v8i8:$rs1, v8i8:$rs2)), (!cast<Instruction>("PDIF_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v8i8 (abdu v8i8:$rs1, v8i8:$rs2)), (!cast<Instruction>("PDIFU_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v8i8 (abds (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PDIF_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v8i8 (abdu (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PDIFU_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i32 (riscv_pli simm10:$imm10)), (PLI_W simm10:$imm10)>;
+ def: Pat<(v4i16 (riscv_pli simm10:$imm10)), (PLI_H simm10:$imm10)>;
+ def: Pat<(v8i8 (riscv_pli simm8_unsigned:$imm8)), (PLI_B simm8_unsigned:$imm8)>;
+
// Load/Store patterns for v4i16 and v8i8 (use regular GPR load/store since they're in GPRs)
def : StPat<store, SD, GPR, v4i16>;
def : LdPat<load, LD, v4i16>;
@@ -1520,62 +1510,48 @@ let Predicates = [HasStdExtP, IsRV64] in {
def : LdPat<load, LD, v2i32>;
} // Predicates = [HasStdExtP, IsRV64]
-let Predicates = [HasStdExtP, IsRV32] in {
+let Predicates = [HasStdExtP] in {
// Basic arithmetic patterns for v2i16 (16-bit elements in 32-bit GPR)
- def: Pat<(v2i16 (add v2i16:$rs1, v2i16:$rs2)), (!cast<Instruction>("PADD_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v2i16 (sub v2i16:$rs1, v2i16:$rs2)), (!cast<Instruction>("PSUB_H") GPR:$rs1, GPR:$rs2)>;
-
+ def: Pat<(v2i16 (add (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PADD_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i16 (sub (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PSUB_H") GPR:$rs1, GPR:$rs2)>;
+
// Saturating add/sub patterns for v2i16
- def: Pat<(v2i16 (saddsat v2i16:$rs1, v2i16:$rs2)), (!cast<Instruction>("PSADD_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v2i16 (uaddsat v2i16:$rs1, v2i16:$rs2)), (!cast<Instruction>("PSADDU_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v2i16 (ssubsat v2i16:$rs1, v2i16:$rs2)), (!cast<Instruction>("PSSUB_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v2i16 (usubsat v2i16:$rs1, v2i16:$rs2)), (!cast<Instruction>("PSSUBU_H") GPR:$rs1, GPR:$rs2)>;
-
+ def: Pat<(v2i16 (saddsat (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PSADD_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i16 (uaddsat (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PSADDU_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i16 (ssubsat (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PSSUB_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i16 (usubsat (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PSSUBU_H") GPR:$rs1, GPR:$rs2)>;
+
// Averaging patterns for v2i16
- def: Pat<(v2i16 (avgfloors v2i16:$rs1, v2i16:$rs2)), (!cast<Instruction>("PAADD_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v2i16 (avgflooru v2i16:$rs1, v2i16:$rs2)), (!cast<Instruction>("PAADDU_H") GPR:$rs1, GPR:$rs2)>;
-
- // Averaging subtraction patterns for v2i16
- // PASUB_H: signed (a - b) >> 1
- def: Pat<(v2i16 (sra (sub v2i16:$rs1, v2i16:$rs2), (v2i16 (build_vector (XLenVT 1))))),
- (!cast<Instruction>("PASUB_H") GPR:$rs1, GPR:$rs2)>;
- // PASUBU_H: unsigned (a - b) >> 1
- def: Pat<(v2i16 (srl (sub v2i16:$rs1, v2i16:$rs2), (v2i16 (build_vector (XLenVT 1))))),
- (!cast<Instruction>("PASUBU_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i16 (avgfloors (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PAADD_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i16 (avgflooru (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PAADDU_H") GPR:$rs1, GPR:$rs2)>;
// Absolute difference patterns for v2i16
- def: Pat<(v2i16 (abds v2i16:$rs1, v2i16:$rs2)), (!cast<Instruction>("PDIF_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v2i16 (abdu v2i16:$rs1, v2i16:$rs2)), (!cast<Instruction>("PDIFU_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i16 (abds (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PDIF_H") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i16 (abdu (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PDIFU_H") GPR:$rs1, GPR:$rs2)>;
// Basic arithmetic patterns for v4i8 (8-bit elements in 32-bit GPR)
- def: Pat<(v4i8 (add v4i8:$rs1, v4i8:$rs2)), (!cast<Instruction>("PADD_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i8 (sub v4i8:$rs1, v4i8:$rs2)), (!cast<Instruction>("PSUB_B") GPR:$rs1, GPR:$rs2)>;
-
+ def: Pat<(v4i8 (add (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PADD_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i8 (sub (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PSUB_B") GPR:$rs1, GPR:$rs2)>;
+
// Saturating add/sub patterns for v4i8
- def: Pat<(v4i8 (saddsat v4i8:$rs1, v4i8:$rs2)), (!cast<Instruction>("PSADD_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i8 (uaddsat v4i8:$rs1, v4i8:$rs2)), (!cast<Instruction>("PSADDU_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i8 (ssubsat v4i8:$rs1, v4i8:$rs2)), (!cast<Instruction>("PSSUB_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i8 (usubsat v4i8:$rs1, v4i8:$rs2)), (!cast<Instruction>("PSSUBU_B") GPR:$rs1, GPR:$rs2)>;
-
+ def: Pat<(v4i8 (saddsat (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PSADD_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i8 (uaddsat (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PSADDU_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i8 (ssubsat (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PSSUB_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i8 (usubsat (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PSSUBU_B") GPR:$rs1, GPR:$rs2)>;
+
// Averaging patterns for v4i8
- def: Pat<(v4i8 (avgfloors v4i8:$rs1, v4i8:$rs2)), (!cast<Instruction>("PAADD_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i8 (avgflooru v4i8:$rs1, v4i8:$rs2)), (!cast<Instruction>("PAADDU_B") GPR:$rs1, GPR:$rs2)>;
-
- // Averaging subtraction patterns for v4i8
- // PASUB_B: signed (a - b) >> 1
- def: Pat<(v4i8 (sra (sub v4i8:$rs1, v4i8:$rs2), (v4i8 (build_vector (XLenVT 1))))),
- (!cast<Instruction>("PASUB_B") GPR:$rs1, GPR:$rs2)>;
- // PASUBU_B: unsigned (a - b) >> 1
- def: Pat<(v4i8 (srl (sub v4i8:$rs1, v4i8:$rs2), (v4i8 (build_vector (XLenVT 1))))),
- (!cast<Instruction>("PASUBU_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i8 (avgfloors (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PAADD_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i8 (avgflooru (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PAADDU_B") GPR:$rs1, GPR:$rs2)>;
// Absolute difference patterns for v4i8
- def: Pat<(v4i8 (abds v4i8:$rs1, v4i8:$rs2)), (!cast<Instruction>("PDIF_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i8 (abdu v4i8:$rs1, v4i8:$rs2)), (!cast<Instruction>("PDIFU_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i8 (abds (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PDIF_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v4i8 (abdu (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PDIFU_B") GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i16 (riscv_pli simm10:$imm10)), (PLI_H simm10:$imm10)>;
+ def: Pat<(v4i8 (riscv_pli simm8_unsigned:$imm8)), (PLI_B simm8_unsigned:$imm8)>;
// Load/Store patterns for v2i16 and v4i8 (use regular GPR load/store since they're in GPRs)
def : StPat<store, SW, GPR, v2i16>;
def : LdPat<load, LW, v2i16>;
def : StPat<store, SW, GPR, v4i8>;
def : LdPat<load, LW, v4i8>;
-} // Predicates = [HasStdExtP, IsRV32]
+} // Predicates = [HasStdExtP]
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index e669175a3d8e1..a07d441b448d2 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -1635,7 +1635,8 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
// TODO: Add proper cost model for P extension fixed vectors (e.g., v4i16)
// For now, skip all fixed vector cost analysis when P extension is available
// to avoid crashes in getMinRVVVectorSizeInBits()
- if (ST->hasStdExtP() && (isa<FixedVectorType>(Dst) || isa<FixedVectorType>(Src))) {
+ if (ST->hasStdExtP() &&
+ (isa<FixedVectorType>(Dst) || isa<FixedVectorType>(Src))) {
return 1; // Treat as single instruction cost for now
}
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
index 8a4ab1d545f41..c38f103d86bd6 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
@@ -349,9 +349,12 @@ define void @test_pasub_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-NEXT: ret
%a = load <2 x i16>, ptr %a_ptr
%b = load <2 x i16>, ptr %b_ptr
- %sub = sub <2 x i16> %a, %b
- %res = ashr <2 x i16> %sub, <i16 1, i16 1>
- store <2 x i16> %res, ptr %ret_ptr
+ %a_ext = sext <2 x i16> %a to <2 x i32>
+ %b_ext = sext <2 x i16> %b to <2 x i32>
+ %sub = sub <2 x i32> %a_ext, %b_ext
+ %res = ashr <2 x i32> %sub, <i32 1, i32 1>
+ %res_trunc = trunc <2 x i32> %res to <2 x i16>
+ store <2 x i16> %res_trunc, ptr %ret_ptr
ret void
}
@@ -367,9 +370,12 @@ define void @test_pasubu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-NEXT: ret
%a = load <2 x i16>, ptr %a_ptr
%b = load <2 x i16>, ptr %b_ptr
- %sub = sub <2 x i16> %a, %b
- %res = lshr <2 x i16> %sub, <i16 1, i16 1>
- store <2 x i16> %res, ptr %ret_ptr
+ %a_ext = zext <2 x i16> %a to <2 x i32>
+ %b_ext = zext <2 x i16> %b to <2 x i32>
+ %sub = sub <2 x i32> %a_ext, %b_ext
+ %res = lshr <2 x i32> %sub, <i32 1, i32 1>
+ %res_trunc = trunc <2 x i32> %res to <2 x i16>
+ store <2 x i16> %res_trunc, ptr %ret_ptr
ret void
}
@@ -384,9 +390,12 @@ define void @test_pasub_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-NEXT: ret
%a = load <4 x i8>, ptr %a_ptr
%b = load <4 x i8>, ptr %b_ptr
- %sub = sub <4 x i8> %a, %b
- %res = ashr <4 x i8> %sub, <i8 1, i8 1, i8 1, i8 1>
- store <4 x i8> %res, ptr %ret_ptr
+ %a_ext = sext <4 x i8> %a to <4 x i16>
+ %b_ext = sext <4 x i8> %b to <4 x i16>
+ %sub = sub <4 x i16> %a_ext, %b_ext
+ %res = ashr <4 x i16> %sub, <i16 1, i16 1, i16 1, i16 1>
+ %res_trunc = trunc <4 x i16> %res to <4 x i8>
+ store <4 x i8> %res_trunc, ptr %ret_ptr
ret void
}
@@ -401,8 +410,35 @@ define void @test_pasubu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-NEXT: ret
%a = load <4 x i8>, ptr %a_ptr
%b = load <4 x i8>, ptr %b_ptr
- %sub = sub <4 x i8> %a, %b
- %res = lshr <4 x i8> %sub, <i8 1, i8 1, i8 1, i8 1>
+ %a_ext = zext <4 x i8> %a to <4 x i16>
+ %b_ext = zext <4 x i8> %b to <4 x i16>
+ %sub = sub <4 x i16> %a_ext, %b_ext
+ %res = lshr <4 x i16> %sub, <i16 1, i16 1, i16 1, i16 1>
+ %res_trunc = trunc <4 x i16> %res to <4 x i8>
+ store <4 x i8> %res_trunc, ptr %ret_ptr
+ ret void
+}
+
+; Test PLI (pack load immediate) for v2i16
+define void @test_pli_h(ptr %ret_ptr) {
+; CHECK-LABEL: test_pli_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pli.h a1, 42
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %res = add <2 x i16> <i16 42, i16 42>, <i16 0, i16 0>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test PLI for v4i8 with unsigned immediate
+define void @test_pli_b(ptr %ret_ptr) {
+; CHECK-LABEL: test_pli_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pli.b a1, 32
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %res = add <4 x i8> <i8 32, i8 32, i8 32, i8 32>, <i8 0, i8 0, i8 0, i8 0>
store <4 x i8> %res, ptr %ret_ptr
ret void
}
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
index d4918e4e0aa62..d4452b7ccbc65 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
@@ -353,9 +353,12 @@ define void @test_pasub_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%b = load <4 x i16>, ptr %b_ptr
- %sub = sub <4 x i16> %a, %b
- %res = ashr <4 x i16> %sub, <i16 1, i16 1, i16 1, i16 1>
- store <4 x i16> %res, ptr %ret_ptr
+ %a_ext = sext <4 x i16> %a to <4 x i32>
+ %b_ext = sext <4 x i16> %b to <4 x i32>
+ %sub = sub <4 x i32> %a_ext, %b_ext
+ %res = ashr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1>
+ %res_trunc = trunc <4 x i32> %res to <4 x i16>
+ store <4 x i16> %res_trunc, ptr %ret_ptr
ret void
}
@@ -371,9 +374,12 @@ define void @test_pasubu_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-NEXT: ret
%a = load <4 x i16>, ptr %a_ptr
%b = load <4 x i16>, ptr %b_ptr
- %sub = sub <4 x i16> %a, %b
- %res = lshr <4 x i16> %sub, <i16 1, i16 1, i16 1, i16 1>
- store <4 x i16> %res, ptr %ret_ptr
+ %a_ext = zext <4 x i16> %a to <4 x i32>
+ %b_ext = zext <4 x i16> %b to <4 x i32>
+ %sub = sub <4 x i32> %a_ext, %b_ext
+ %res = lshr <4 x i32> %sub, <i32 1, i32 1, i32 1, i32 1>
+ %res_trunc = trunc <4 x i32> %res to <4 x i16>
+ store <4 x i16> %res_trunc, ptr %ret_ptr
ret void
}
@@ -388,9 +394,12 @@ define void @test_pasub_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-NEXT: ret
%a = load <8 x i8>, ptr %a_ptr
%b = load <8 x i8>, ptr %b_ptr
- %sub = sub <8 x i8> %a, %b
- %res = ashr <8 x i8> %sub, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
- store <8 x i8> %res, ptr %ret_ptr
+ %a_ext = sext <8 x i8> %a to <8 x i16>
+ %b_ext = sext <8 x i8> %b to <8 x i16>
+ %sub = sub <8 x i16> %a_ext, %b_ext
+ %res = ashr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %res_trunc = trunc <8 x i16> %res to <8 x i8>
+ store <8 x i8> %res_trunc, ptr %ret_ptr
ret void
}
@@ -405,12 +414,51 @@ define void @test_pasubu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; CHECK-NEXT: ret
%a = load <8 x i8>, ptr %a_ptr
%b = load <8 x i8>, ptr %b_ptr
- %sub = sub <8 x i8> %a, %b
- %res = lshr <8 x i8> %sub, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
+ %a_ext = zext <8 x i8> %a to <8 x i16>
+ %b_ext = zext <8 x i8> %b to <8 x i16>
+ %sub = sub <8 x i16> %a_ext, %b_ext
+ %res = lshr <8 x i16> %sub, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
+ %res_trunc = trunc <8 x i16> %res to <8 x i8>
+ store <8 x i8> %res_trunc, ptr %ret_ptr
+ ret void
+}
+
+; Test PLI (pack load immediate) for v4i16
+define void @test_pli_h(ptr %ret_ptr) {
+; CHECK-LABEL: test_pli_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pli.h a1, 100
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %res = add <4 x i16> <i16 100, i16 100, i16 100, i16 100>, <i16 0, i16 0, i16 0, i16 0>
+ store <4 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
+; Test PLI for v8i8 with unsigned immediate
+define void @test_pli_b(ptr %ret_ptr) {
+; CHECK-LABEL: test_pli_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pli.b a1, 64
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %res = add <8 x i8> <i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64, i8 64>, <i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>
store <8 x i8> %res, ptr %ret_ptr
ret void
}
+; Test PLI for v2i32 with signed immediate
+define void @test_pli_w(ptr %ret_ptr) {
+; CHECK-LABEL: test_pli_w:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pli.w a1, -256
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %res = add <2 x i32> <i32 -256, i32 -256>, <i32 0, i32 0>
+ store <2 x i32> %res, ptr %ret_ptr
+ ret void
+}
+
; Intrinsic declarations
declare <4 x i16> @llvm.sadd.sat.v4i16(<4 x i16>, <4 x i16>)
declare <4 x i16> @llvm.uadd.sat.v4i16(<4 x i16>, <4 x i16>)
>From 34dc464b646e0e196431212930e6d9c708387f98 Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Wed, 15 Oct 2025 02:34:20 -0700
Subject: [PATCH 03/13] fixup! add a switch for codegen
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 14 ++++++++++----
llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll | 2 +-
llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll | 2 +-
3 files changed, 12 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 33c0e6bc66ad9..9eb98766e4846 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -87,6 +87,12 @@ static cl::opt<bool>
"be combined with a shift"),
cl::init(true));
+static cl::opt<bool> EnablePExtCodeGen(
+ DEBUG_TYPE "-enable-p-ext-codegen", cl::Hidden,
+ cl::desc("Turn on P Extension codegen(This is a temporary switch where "
+ "only partial codegen is currently supported."),
+ cl::init(false));
+
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
@@ -280,7 +286,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
// fixed vector is stored in GPRs for P extension packed operations
- if (Subtarget.hasStdExtP()) {
+ if (Subtarget.hasStdExtP() && EnablePExtCodeGen) {
addRegisterClass(MVT::v2i16, &RISCV::GPRRegClass);
addRegisterClass(MVT::v4i8, &RISCV::GPRRegClass);
if (Subtarget.is64Bit()) {
@@ -490,7 +496,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
ISD::FROUNDEVEN, ISD::FCANONICALIZE};
- if (Subtarget.hasStdExtP()) {
+ if (Subtarget.hasStdExtP() && EnablePExtCodeGen) {
setTargetDAGCombine(ISD::TRUNCATE);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
setTruncStoreAction(MVT::v4i16, MVT::v4i8, Expand);
@@ -4349,7 +4355,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
SDLoc DL(Op);
// Handle P extension packed vector BUILD_VECTOR with PLI for splat constants
- if (Subtarget.hasStdExtP()) {
+ if (Subtarget.hasStdExtP() && EnablePExtCodeGen) {
bool IsPExtVector =
(VT == MVT::v2i16 || VT == MVT::v4i8) ||
(Subtarget.is64Bit() &&
@@ -16151,7 +16157,7 @@ static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (Subtarget.hasStdExtP() && VT.isFixedLengthVector())
+ if (Subtarget.hasStdExtP() && VT.isFixedLengthVector() && EnablePExtCodeGen)
return combinePExtTruncate(N, DAG, Subtarget);
// Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
index c38f103d86bd6..2cb93f1faab91 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -riscv-lower-enable-p-ext-codegen -verify-machineinstrs < %s | FileCheck %s
; Test basic add/sub operations for v2i16
define void @test_padd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
index d4452b7ccbc65..67cfb0e2123a4 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -riscv-lower-enable-p-ext-codegen -verify-machineinstrs < %s | FileCheck %s
; Test basic add/sub operations for v4i16
define void @test_padd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
>From 47c5eb436007750bf7563b669c88645840424ecd Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Sun, 19 Oct 2025 23:33:56 -0700
Subject: [PATCH 04/13] fixup! handle rv32 legalization
---
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 8 ++
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 135 ++++++++++++++------
llvm/lib/Target/RISCV/RISCVISelLowering.h | 3 +
llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 29 +++++
llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll | 1 +
5 files changed, 138 insertions(+), 38 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 437022f5cde9f..4c1a9929d2574 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -2471,6 +2471,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
CurDAG->RemoveDeadNode(Node);
return;
}
+ if (Subtarget->hasStdExtP()) {
+ if (((VT == MVT::v4i16 || VT == MVT::v8i8) && SrcVT == MVT::i64) ||
+ ((SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) && VT == MVT::i64)) {
+ ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
+ CurDAG->RemoveDeadNode(Node);
+ }
+ return;
+ }
break;
}
case ISD::INSERT_SUBVECTOR:
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 9eb98766e4846..7a5b6bb2b90d0 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -287,12 +287,13 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
// fixed vector is stored in GPRs for P extension packed operations
if (Subtarget.hasStdExtP() && EnablePExtCodeGen) {
- addRegisterClass(MVT::v2i16, &RISCV::GPRRegClass);
- addRegisterClass(MVT::v4i8, &RISCV::GPRRegClass);
if (Subtarget.is64Bit()) {
addRegisterClass(MVT::v2i32, &RISCV::GPRRegClass);
addRegisterClass(MVT::v4i16, &RISCV::GPRRegClass);
addRegisterClass(MVT::v8i8, &RISCV::GPRRegClass);
+ } else {
+ addRegisterClass(MVT::v2i16, &RISCV::GPRRegClass);
+ addRegisterClass(MVT::v4i8, &RISCV::GPRRegClass);
}
}
@@ -500,26 +501,31 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setTargetDAGCombine(ISD::TRUNCATE);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
setTruncStoreAction(MVT::v4i16, MVT::v4i8, Expand);
- // load/store are already handled by pattern matching
- SmallVector<MVT, 2> VTs = {MVT::v2i16, MVT::v4i8};
+ SmallVector<MVT, 2> VTs;
if (Subtarget.is64Bit()) {
VTs.append({MVT::v2i32, MVT::v4i16, MVT::v8i8});
setTruncStoreAction(MVT::v2i64, MVT::v2i32, Expand);
setTruncStoreAction(MVT::v4i32, MVT::v4i16, Expand);
setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand);
+ setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
+ setTruncStoreAction(MVT::v4i16, MVT::v4i8, Expand);
+ setOperationAction(ISD::LOAD, MVT::v2i16, Custom);
+ setOperationAction(ISD::LOAD, MVT::v4i8, Custom);
+ setOperationAction(ISD::STORE, MVT::v2i16, Custom);
+ setOperationAction(ISD::STORE, MVT::v4i8, Custom);
+ } else {
+ VTs.append({MVT::v2i16, MVT::v4i8});
}
- for (auto VT : VTs) {
- setOperationAction(ISD::BUILD_VECTOR, VT, Custom);
- setOperationAction(ISD::UADDSAT, VT, Legal);
- setOperationAction(ISD::SADDSAT, VT, Legal);
- setOperationAction(ISD::USUBSAT, VT, Legal);
- setOperationAction(ISD::SSUBSAT, VT, Legal);
- setOperationAction(ISD::SSHLSAT, VT, Legal);
- setOperationAction(ISD::USHLSAT, VT, Legal);
- setOperationAction(ISD::BITCAST, VT, Custom);
- setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU}, VT, Legal);
- setOperationAction({ISD::ABDS, ISD::ABDU}, VT, Legal);
- }
+ setOperationAction(ISD::UADDSAT, VTs, Legal);
+ setOperationAction(ISD::SADDSAT, VTs, Legal);
+ setOperationAction(ISD::USUBSAT, VTs, Legal);
+ setOperationAction(ISD::SSUBSAT, VTs, Legal);
+ setOperationAction(ISD::SSHLSAT, VTs, Legal);
+ setOperationAction(ISD::USHLSAT, VTs, Legal);
+ setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU}, VTs, Legal);
+ setOperationAction({ISD::ABDS, ISD::ABDU}, VTs, Legal);
+ setOperationAction(ISD::BUILD_VECTOR, VTs, Custom);
+ setOperationAction(ISD::BITCAST, VTs, Custom);
}
if (Subtarget.hasStdExtZfbfmin()) {
@@ -1739,6 +1745,15 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
MaxLoadsPerMemcmp = Subtarget.getMaxLoadsPerMemcmp(/*OptSize=*/false);
}
+TargetLoweringBase::LegalizeTypeAction
+RISCVTargetLowering::getPreferredVectorAction(MVT VT) const {
+ if (Subtarget.hasStdExtP() && Subtarget.is64Bit())
+ if (VT == MVT::v2i16 || VT == MVT::v4i8)
+ return TypeWidenVector;
+
+ return TargetLoweringBase::getPreferredVectorAction(VT);
+}
+
EVT RISCVTargetLowering::getSetCCResultType(const DataLayout &DL,
LLVMContext &Context,
EVT VT) const {
@@ -7533,6 +7548,19 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
}
+ if (Subtarget.hasStdExtP()) {
+ bool Is32BitCast =
+ (VT == MVT::i32 && (Op0VT == MVT::v4i8 || Op0VT == MVT::v2i16)) ||
+ (Op0VT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
+ bool Is64BitCast =
+ (VT == MVT::i64 && (Op0VT == MVT::v8i8 || Op0VT == MVT::v4i16 ||
+ Op0VT == MVT::v2i32)) ||
+ (Op0VT == MVT::i64 &&
+ (VT == MVT::v8i8 || VT == MVT::v4i16 || VT == MVT::v2i32));
+ if (Is32BitCast || Is64BitCast)
+ return Op;
+ }
+
// Consider other scalar<->scalar casts as legal if the types are legal.
// Otherwise expand them.
if (!VT.isVector() && !Op0VT.isVector()) {
@@ -8205,6 +8233,17 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
auto *Store = cast<StoreSDNode>(Op);
SDValue StoredVal = Store->getValue();
EVT VT = StoredVal.getValueType();
+ if (Subtarget.hasStdExtP()) {
+ if (VT == MVT::v2i16 || VT == MVT::v4i8) {
+ SDValue DL(Op);
+ SDValue Cast = DAG.getBitcast(MVT::i32, StoredVal);
+ SDValue NewStore =
+ DAG.getStore(Store->getChain(), DL, Cast, Store->getBasePtr(),
+ Store->getPointerInfo(), Store->getBaseAlign(),
+ Store->getMemOperand()->getFlags());
+ return NewStore;
+ }
+ }
if (VT == MVT::f64) {
assert(Subtarget.hasStdExtZdinx() && !Subtarget.hasStdExtZilsd() &&
!Subtarget.is64Bit() && "Unexpected custom legalisation");
@@ -14632,6 +14671,19 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
+ if (Subtarget.hasStdExtP() && Subtarget.is64Bit()) {
+ SDLoc DL(N);
+ SDValue ExtLoad =
+ DAG.getExtLoad(ISD::SEXTLOAD, DL, MVT::i64, Ld->getChain(),
+ Ld->getBasePtr(), MVT::i32, Ld->getMemOperand());
+ if (N->getValueType(0) == MVT::v2i16)
+ Results.push_back(DAG.getBitcast(MVT::v4i16, ExtLoad));
+ else if (N->getValueType(0) == MVT::v4i8)
+ Results.push_back(DAG.getBitcast(MVT::v8i8, ExtLoad));
+ Results.push_back(ExtLoad.getValue(1));
+ return;
+ }
+
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
@@ -14960,6 +15012,24 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
Results.push_back(DAG.getNode(ISD::TRUNCATE, DL, VT, NewRes));
break;
}
+ case RISCVISD::PASUB:
+ case RISCVISD::PASUBU: {
+ MVT VT = N->getSimpleValueType(0);
+ SDValue Op0 = N->getOperand(0);
+ SDValue Op1 = N->getOperand(1);
+ assert(VT == MVT::v2i16 || VT == MVT::v4i8);
+ MVT NewVT = MVT::v4i16;
+ if (VT == MVT::v4i8)
+ NewVT = MVT::v8i8;
+ Op0 = DAG.getBitcast(MVT::i32, Op0);
+ Op0 = DAG.getSExtOrTrunc(Op0, DL, MVT::i64);
+ Op0 = DAG.getBitcast(NewVT, Op0);
+ Op1 = DAG.getBitcast(MVT::i32, Op1);
+ Op1 = DAG.getSExtOrTrunc(Op1, DL, MVT::i64);
+ Op1 = DAG.getBitcast(NewVT, Op1);
+ Results.push_back(DAG.getNode(N->getOpcode(), DL, NewVT, {Op0, Op1}));
+ return;
+ }
case ISD::EXTRACT_VECTOR_ELT: {
// Custom-legalize an EXTRACT_VECTOR_ELT where XLEN<SEW, as the SEW element
// type is illegal (currently only vXi64 RV32).
@@ -16125,31 +16195,20 @@ static SDValue combinePExtTruncate(SDNode *N, SelectionDAG &DAG,
// Determine the instruction based on type and signedness
unsigned Opc;
MVT VecVT = VT.getSimpleVT();
- if (VecVT == MVT::v4i16 && IsSignExt)
- Opc = RISCV::PASUB_H;
- else if (VecVT == MVT::v4i16 && IsZeroExt)
- Opc = RISCV::PASUBU_H;
- else if (VecVT == MVT::v2i16 && IsSignExt)
- Opc = RISCV::PASUB_H;
- else if (VecVT == MVT::v2i16 && IsZeroExt)
- Opc = RISCV::PASUBU_H;
- else if (VecVT == MVT::v8i8 && IsSignExt)
- Opc = RISCV::PASUB_B;
- else if (VecVT == MVT::v8i8 && IsZeroExt)
- Opc = RISCV::PASUBU_B;
- else if (VecVT == MVT::v4i8 && IsSignExt)
- Opc = RISCV::PASUB_B;
- else if (VecVT == MVT::v4i8 && IsZeroExt)
- Opc = RISCV::PASUBU_B;
- else if (VecVT == MVT::v2i32 && IsSignExt)
- Opc = RISCV::PASUB_W;
- else if (VecVT == MVT::v2i32 && IsZeroExt)
- Opc = RISCV::PASUBU_W;
- else
+ if (VecVT == MVT::v4i16 || VecVT == MVT::v2i16 || VecVT == MVT::v8i8 ||
+ VecVT == MVT::v4i8 || VecVT == MVT::v2i32) {
+ if (IsSignExt)
+ Opc = RISCVISD::PASUB;
+ else if (IsZeroExt)
+ Opc = RISCVISD::PASUBU;
+ else
+ return SDValue();
+ } else {
return SDValue();
+ }
// Create the machine node directly
- return SDValue(DAG.getMachineNode(Opc, SDLoc(N), VT, {A, B}), 0);
+ return DAG.getNode(Opc, SDLoc(N), VT, {A, B});
}
static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.h b/llvm/lib/Target/RISCV/RISCVISelLowering.h
index 3f81ed74c12ed..b6f2b807cf1d3 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.h
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.h
@@ -71,6 +71,9 @@ class RISCVTargetLowering : public TargetLowering {
bool preferScalarizeSplat(SDNode *N) const override;
+ /// Customize the preferred legalization strategy for certain types.
+ LegalizeTypeAction getPreferredVectorAction(MVT VT) const override;
+
bool softPromoteHalfType() const override { return true; }
/// Return the register type for a given MVT, ensuring vectors are treated
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index e6d4aa8070e0a..543193d57689f 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1458,6 +1458,12 @@ let Predicates = [HasStdExtP, IsRV32] in {
def SDT_RISCVPLI : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>;
def riscv_pli : RVSDNode<"PLI", SDT_RISCVPLI>;
+def SDT_RISCVPASUB
+ : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>]>;
+def riscv_pasub : RVSDNode<"PASUB", SDT_RISCVPASUB>;
+def SDT_RISCVPASUBU
+ : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>]>;
+def riscv_pasubu : RVSDNode<"PASUBU", SDT_RISCVPASUBU>;
let Predicates = [HasStdExtP, IsRV64] in {
// Basic arithmetic patterns for v4i16 (16-bit elements in 64-bit GPR)
def: Pat<(v4i16 (add (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PADD_H") GPR:$rs1, GPR:$rs2)>;
@@ -1499,6 +1505,19 @@ let Predicates = [HasStdExtP, IsRV64] in {
def: Pat<(v4i16 (riscv_pli simm10:$imm10)), (PLI_H simm10:$imm10)>;
def: Pat<(v8i8 (riscv_pli simm8_unsigned:$imm8)), (PLI_B simm8_unsigned:$imm8)>;
+ def: Pat<(v8i8 (riscv_pasub (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))),
+ (PASUB_B (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))>;
+ def: Pat<(v4i16 (riscv_pasub (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))),
+ (PASUB_H (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))>;
+ def: Pat<(v2i32 (riscv_pasub (v2i32 GPR:$rs1), (v2i32 GPR:$rs2))),
+ (PASUB_W (v2i32 GPR:$rs1), (v2i32 GPR:$rs2))>;
+ def: Pat<(v8i8 (riscv_pasubu (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))),
+ (PASUBU_B (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))>;
+ def: Pat<(v4i16 (riscv_pasubu (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))),
+ (PASUBU_H (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))>;
+ def: Pat<(v2i32 (riscv_pasubu (v2i32 GPR:$rs1), (v2i32 GPR:$rs2))),
+ (PASUBU_W (v2i32 GPR:$rs1), (v2i32 GPR:$rs2))>;
+
// Load/Store patterns for v4i16 and v8i8 (use regular GPR load/store since they're in GPRs)
def : StPat<store, SD, GPR, v4i16>;
def : LdPat<load, LD, v4i16>;
@@ -1549,6 +1568,16 @@ let Predicates = [HasStdExtP] in {
def: Pat<(v2i16 (riscv_pli simm10:$imm10)), (PLI_H simm10:$imm10)>;
def: Pat<(v4i8 (riscv_pli simm8_unsigned:$imm8)), (PLI_B simm8_unsigned:$imm8)>;
+
+ def: Pat<(v4i8 (riscv_pasub (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))),
+ (PASUB_B (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))>;
+ def: Pat<(v2i16 (riscv_pasub (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))),
+ (PASUB_H (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))>;
+ def: Pat<(v4i8 (riscv_pasubu (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))),
+ (PASUBU_B (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))>;
+ def: Pat<(v2i16 (riscv_pasubu (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))),
+ (PASUBU_H (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))>;
+
// Load/Store patterns for v2i16 and v4i8 (use regular GPR load/store since they're in GPRs)
def : StPat<store, SW, GPR, v2i16>;
def : LdPat<load, LW, v2i16>;
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
index 2cb93f1faab91..fce0a31d60335 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
@@ -1,5 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -riscv-lower-enable-p-ext-codegen -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -riscv-lower-enable-p-ext-codegen -verify-machineinstrs < %s | FileCheck %s
; Test basic add/sub operations for v2i16
define void @test_padd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
>From 7d6a831efae35a507955bb73aeaaebc78a83f743 Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Thu, 23 Oct 2025 21:23:11 -0700
Subject: [PATCH 05/13] fixup! use vector_concats
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 9 +++------
1 file changed, 3 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 7a5b6bb2b90d0..caaf7a303110a 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -15021,12 +15021,9 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
MVT NewVT = MVT::v4i16;
if (VT == MVT::v4i8)
NewVT = MVT::v8i8;
- Op0 = DAG.getBitcast(MVT::i32, Op0);
- Op0 = DAG.getSExtOrTrunc(Op0, DL, MVT::i64);
- Op0 = DAG.getBitcast(NewVT, Op0);
- Op1 = DAG.getBitcast(MVT::i32, Op1);
- Op1 = DAG.getSExtOrTrunc(Op1, DL, MVT::i64);
- Op1 = DAG.getBitcast(NewVT, Op1);
+ SDValue Undef = DAG.getUNDEF(VT);
+ Op0 = DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, {Op0, Undef});
+ Op1 = DAG.getNode(ISD::CONCAT_VECTORS, DL, NewVT, {Op1, Undef});
Results.push_back(DAG.getNode(N->getOpcode(), DL, NewVT, {Op0, Op1}));
return;
}
>From 228d38f60af354f7ce6fcaa937075a495dfd7e30 Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Wed, 29 Oct 2025 01:54:35 -0700
Subject: [PATCH 06/13] fixup! hardware mode
---
llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 194 ++++++++-------------
llvm/lib/Target/RISCV/RISCVRegisterInfo.td | 10 +-
2 files changed, 84 insertions(+), 120 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 543193d57689f..8d731667976a4 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1456,131 +1456,91 @@ let Predicates = [HasStdExtP, IsRV32] in {
def PMAXU_DB : RVPPairBinaryExchanged_rr<0b1111, 0b10, "pmaxu.db">;
} // Predicates = [HasStdExtP, IsRV32]
-def SDT_RISCVPLI : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisInt<1>]>;
+def SDT_RISCVPLI : SDTypeProfile<1, 1, [SDTCisVec<0>,
+ SDTCisInt<0>,
+ SDTCisInt<1>]>;
def riscv_pli : RVSDNode<"PLI", SDT_RISCVPLI>;
-def SDT_RISCVPASUB
- : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>]>;
+def SDT_RISCVPASUB : SDTypeProfile<1, 2, [SDTCisVec<0>,
+ SDTCisInt<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>]>;
def riscv_pasub : RVSDNode<"PASUB", SDT_RISCVPASUB>;
-def SDT_RISCVPASUBU
- : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>]>;
+def SDT_RISCVPASUBU : SDTypeProfile<1, 2, [SDTCisVec<0>,
+ SDTCisInt<0>,
+ SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>]>;
def riscv_pasubu : RVSDNode<"PASUBU", SDT_RISCVPASUBU>;
-let Predicates = [HasStdExtP, IsRV64] in {
- // Basic arithmetic patterns for v4i16 (16-bit elements in 64-bit GPR)
- def: Pat<(v4i16 (add (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PADD_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i16 (sub (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PSUB_H") GPR:$rs1, GPR:$rs2)>;
-
- // Saturating add/sub patterns for v4i16
- def: Pat<(v4i16 (saddsat (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PSADD_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i16 (uaddsat (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PSADDU_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i16 (ssubsat (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PSSUB_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i16 (usubsat (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PSSUBU_H") GPR:$rs1, GPR:$rs2)>;
-
- // Averaging patterns for v4i16
- def: Pat<(v4i16 (avgfloors (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PAADD_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i16 (avgflooru (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PAADDU_H") GPR:$rs1, GPR:$rs2)>;
-
- // Absolute difference patterns for v4i16
- def: Pat<(v4i16 (abds (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PDIF_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i16 (abdu (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))), (!cast<Instruction>("PDIFU_H") GPR:$rs1, GPR:$rs2)>;
+
+let Predicates = [HasStdExtP] in {
+ // Basic 8-bit arithmetic patterns
+ def: Pat<(XLenVecI8VT (add GPR:$rs1, GPR:$rs2)), (PADD_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (sub GPR:$rs1, GPR:$rs2)), (PSUB_B GPR:$rs1, GPR:$rs2)>;
+
+ // Basic 16-bit arithmetic patterns
+ def: Pat<(XLenVecI16VT (add GPR:$rs1, GPR:$rs2)), (PADD_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (sub GPR:$rs1, GPR:$rs2)), (PSUB_H GPR:$rs1, GPR:$rs2)>;
+
+ // 8-bit saturating add/sub patterns
+ def: Pat<(XLenVecI8VT (saddsat GPR:$rs1, GPR:$rs2)), (PSADD_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (uaddsat GPR:$rs1, GPR:$rs2)), (PSADDU_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (ssubsat GPR:$rs1, GPR:$rs2)), (PSSUB_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (usubsat GPR:$rs1, GPR:$rs2)), (PSSUBU_B GPR:$rs1, GPR:$rs2)>;
+
+ // 16-bit saturating add/sub patterns
+ def: Pat<(XLenVecI16VT (saddsat GPR:$rs1, GPR:$rs2)), (PSADD_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (uaddsat GPR:$rs1, GPR:$rs2)), (PSADDU_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (ssubsat GPR:$rs1, GPR:$rs2)), (PSSUB_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (usubsat GPR:$rs1, GPR:$rs2)), (PSSUBU_H GPR:$rs1, GPR:$rs2)>;
+
+ // 8-bit averaging patterns
+ def: Pat<(XLenVecI8VT (avgfloors GPR:$rs1, GPR:$rs2)), (PAADD_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (avgflooru GPR:$rs1, GPR:$rs2)), (PAADDU_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (riscv_pasub GPR:$rs1, GPR:$rs2)), (PASUB_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (riscv_pasubu GPR:$rs1, GPR:$rs2)), (PASUBU_B GPR:$rs1, GPR:$rs2)>;
+
+ // 16-bit averaging patterns
+ def: Pat<(XLenVecI16VT (avgfloors GPR:$rs1, GPR:$rs2)), (PAADD_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (avgflooru GPR:$rs1, GPR:$rs2)), (PAADDU_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (riscv_pasub GPR:$rs1, GPR:$rs2)), (PASUB_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (riscv_pasubu GPR:$rs1, GPR:$rs2)), (PASUBU_H GPR:$rs1, GPR:$rs2)>;
- // Basic arithmetic patterns for v8i8 (8-bit elements in 64-bit GPR)
- def: Pat<(v8i8 (add (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PADD_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v8i8 (sub (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PSUB_B") GPR:$rs1, GPR:$rs2)>;
-
- // Saturating add/sub patterns for v8i8
- def: Pat<(v8i8 (saddsat (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PSADD_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v8i8 (uaddsat (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PSADDU_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v8i8 (ssubsat (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PSSUB_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v8i8 (usubsat (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PSSUBU_B") GPR:$rs1, GPR:$rs2)>;
-
- // Averaging patterns for v8i8
- def: Pat<(v8i8 (avgfloors (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PAADD_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v8i8 (avgflooru (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PAADDU_B") GPR:$rs1, GPR:$rs2)>;
+ // 8-bit absolute difference patterns
+ def: Pat<(XLenVecI8VT (abds GPR:$rs1, GPR:$rs2)), (PDIF_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (abdu GPR:$rs1, GPR:$rs2)), (PDIFU_B GPR:$rs1, GPR:$rs2)>;
- // Absolute difference patterns for v8i8
- def: Pat<(v8i8 (abds (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PDIF_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v8i8 (abdu (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))), (!cast<Instruction>("PDIFU_B") GPR:$rs1, GPR:$rs2)>;
+ // 16-bit absolute difference patterns
+ def: Pat<(XLenVecI16VT (abds GPR:$rs1, GPR:$rs2)), (PDIF_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (abdu GPR:$rs1, GPR:$rs2)), (PDIFU_H GPR:$rs1, GPR:$rs2)>;
+
+ // 8-bit PLI SD node pattern
+ def: Pat<(XLenVecI8VT (riscv_pli simm8_unsigned:$imm8)), (PLI_B simm8_unsigned:$imm8)>;
+ // 16-bit PLI SD node pattern
+ def: Pat<(XLenVecI16VT (riscv_pli simm10:$imm10)), (PLI_H simm10:$imm10)>;
+
+} // Predicates = [HasStdExtP]
+
+let Predicates = [HasStdExtP, IsRV32] in {
+ // Load/Store patterns
+ def : StPat<store, SW, GPR, v4i8>;
+ def : StPat<store, SW, GPR, v2i16>;
+ def : LdPat<load, LW, v4i8>;
+ def : LdPat<load, LW, v2i16>;
+} // Predicates = [HasStdExtP, IsRV32]
+
+let Predicates = [HasStdExtP, IsRV64] in {
+ // 32-bit PLI SD node pattern
def: Pat<(v2i32 (riscv_pli simm10:$imm10)), (PLI_W simm10:$imm10)>;
- def: Pat<(v4i16 (riscv_pli simm10:$imm10)), (PLI_H simm10:$imm10)>;
- def: Pat<(v8i8 (riscv_pli simm8_unsigned:$imm8)), (PLI_B simm8_unsigned:$imm8)>;
-
- def: Pat<(v8i8 (riscv_pasub (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))),
- (PASUB_B (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))>;
- def: Pat<(v4i16 (riscv_pasub (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))),
- (PASUB_H (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))>;
- def: Pat<(v2i32 (riscv_pasub (v2i32 GPR:$rs1), (v2i32 GPR:$rs2))),
- (PASUB_W (v2i32 GPR:$rs1), (v2i32 GPR:$rs2))>;
- def: Pat<(v8i8 (riscv_pasubu (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))),
- (PASUBU_B (v8i8 GPR:$rs1), (v8i8 GPR:$rs2))>;
- def: Pat<(v4i16 (riscv_pasubu (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))),
- (PASUBU_H (v4i16 GPR:$rs1), (v4i16 GPR:$rs2))>;
- def: Pat<(v2i32 (riscv_pasubu (v2i32 GPR:$rs1), (v2i32 GPR:$rs2))),
- (PASUBU_W (v2i32 GPR:$rs1), (v2i32 GPR:$rs2))>;
-
- // Load/Store patterns for v4i16 and v8i8 (use regular GPR load/store since they're in GPRs)
- def : StPat<store, SD, GPR, v4i16>;
- def : LdPat<load, LD, v4i16>;
+
+ // 32-bit averaging-sub patterns
+ def: Pat<(v2i32 (riscv_pasub GPR:$rs1, GPR:$rs2)), (PASUB_W GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i32 (riscv_pasubu GPR:$rs1, GPR:$rs2)), (PASUBU_W GPR:$rs1, GPR:$rs2)>;
+
+ // Load/Store patterns
def : StPat<store, SD, GPR, v8i8>;
- def : LdPat<load, LD, v8i8>;
-
- // Load/Store patterns for v2i32 (32-bit elements in 64-bit GPR)
+ def : StPat<store, SD, GPR, v4i16>;
def : StPat<store, SD, GPR, v2i32>;
+ def : LdPat<load, LD, v8i8>;
+ def : LdPat<load, LD, v4i16>;
def : LdPat<load, LD, v2i32>;
} // Predicates = [HasStdExtP, IsRV64]
-
-let Predicates = [HasStdExtP] in {
- // Basic arithmetic patterns for v2i16 (16-bit elements in 32-bit GPR)
- def: Pat<(v2i16 (add (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PADD_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v2i16 (sub (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PSUB_H") GPR:$rs1, GPR:$rs2)>;
-
- // Saturating add/sub patterns for v2i16
- def: Pat<(v2i16 (saddsat (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PSADD_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v2i16 (uaddsat (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PSADDU_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v2i16 (ssubsat (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PSSUB_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v2i16 (usubsat (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PSSUBU_H") GPR:$rs1, GPR:$rs2)>;
-
- // Averaging patterns for v2i16
- def: Pat<(v2i16 (avgfloors (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PAADD_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v2i16 (avgflooru (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PAADDU_H") GPR:$rs1, GPR:$rs2)>;
-
- // Absolute difference patterns for v2i16
- def: Pat<(v2i16 (abds (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PDIF_H") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v2i16 (abdu (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))), (!cast<Instruction>("PDIFU_H") GPR:$rs1, GPR:$rs2)>;
-
- // Basic arithmetic patterns for v4i8 (8-bit elements in 32-bit GPR)
- def: Pat<(v4i8 (add (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PADD_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i8 (sub (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PSUB_B") GPR:$rs1, GPR:$rs2)>;
-
- // Saturating add/sub patterns for v4i8
- def: Pat<(v4i8 (saddsat (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PSADD_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i8 (uaddsat (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PSADDU_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i8 (ssubsat (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PSSUB_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i8 (usubsat (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PSSUBU_B") GPR:$rs1, GPR:$rs2)>;
-
- // Averaging patterns for v4i8
- def: Pat<(v4i8 (avgfloors (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PAADD_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i8 (avgflooru (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PAADDU_B") GPR:$rs1, GPR:$rs2)>;
-
- // Absolute difference patterns for v4i8
- def: Pat<(v4i8 (abds (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PDIF_B") GPR:$rs1, GPR:$rs2)>;
- def: Pat<(v4i8 (abdu (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))), (!cast<Instruction>("PDIFU_B") GPR:$rs1, GPR:$rs2)>;
-
- def: Pat<(v2i16 (riscv_pli simm10:$imm10)), (PLI_H simm10:$imm10)>;
- def: Pat<(v4i8 (riscv_pli simm8_unsigned:$imm8)), (PLI_B simm8_unsigned:$imm8)>;
-
- def: Pat<(v4i8 (riscv_pasub (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))),
- (PASUB_B (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))>;
- def: Pat<(v2i16 (riscv_pasub (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))),
- (PASUB_H (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))>;
- def: Pat<(v4i8 (riscv_pasubu (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))),
- (PASUBU_B (v4i8 GPR:$rs1), (v4i8 GPR:$rs2))>;
- def: Pat<(v2i16 (riscv_pasubu (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))),
- (PASUBU_H (v2i16 GPR:$rs1), (v2i16 GPR:$rs2))>;
-
- // Load/Store patterns for v2i16 and v4i8 (use regular GPR load/store since they're in GPRs)
- def : StPat<store, SW, GPR, v2i16>;
- def : LdPat<load, LW, v2i16>;
- def : StPat<store, SW, GPR, v4i8>;
- def : LdPat<load, LW, v4i8>;
-} // Predicates = [HasStdExtP]
diff --git a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
index fcbb93a55375b..87095e75d5dc4 100644
--- a/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVRegisterInfo.td
@@ -222,6 +222,12 @@ def XLenFVT : ValueTypeByHwMode<[RV64],
[f64]>;
def XLenPairFVT : ValueTypeByHwMode<[RV32],
[f64]>;
+
+// P extension
+def XLenVecI8VT : ValueTypeByHwMode<[RV32, RV64],
+ [v4i8, v8i8]>;
+def XLenVecI16VT : ValueTypeByHwMode<[RV32, RV64],
+ [v2i16, v4i16]>;
def XLenRI : RegInfoByHwMode<
[RV32, RV64],
[RegInfo<32,32,32>, RegInfo<64,64,64>]>;
@@ -240,9 +246,7 @@ class RISCVRegisterClass<list<ValueType> regTypes, int align, dag regList>
class GPRRegisterClass<dag regList>
: RISCVRegisterClass<[XLenVT, XLenFVT,
// P extension packed vector types:
- // RV32: v2i16, v4i8
- // RV64: v2i32, v4i16, v8i8
- v2i16, v4i8, v2i32, v4i16, v8i8], 32, regList> {
+ XLenVecI8VT, XLenVecI16VT, v2i32], 32, regList> {
let RegInfos = XLenRI;
}
>From e308c092879e57d180145523099846195c074521 Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Wed, 29 Oct 2025 01:59:26 -0700
Subject: [PATCH 07/13] fixup! resolve comments
---
llvm/lib/Target/RISCV/RISCVFeatures.td | 4 +
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 2 +-
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 92 +++++++++++--------
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 7 +-
llvm/test/CodeGen/RISCV/features-info.ll | 1 +
llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll | 74 ++++++++++++---
llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll | 2 +-
7 files changed, 128 insertions(+), 54 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index 40c05e8602553..ee1d22aa9a2f2 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1100,6 +1100,10 @@ def FeatureStdExtP
def HasStdExtP : Predicate<"Subtarget->hasStdExtP()">,
AssemblerPredicate<(all_of FeatureStdExtP),
"'Base P' (Packed SIMD)">;
+def FeatureEnablePExtCodeGen
+ : SubtargetFeature<"enable-p-ext-codegen", "EnablePExtCodeGen",
+ "true", "Turn on P Extension codegen(This is a temporary"
+ " switch where only partial codegen is currently supported)">;
def HasStdExtZbaOrP
: Predicate<"Subtarget->hasStdExtZba() || Subtarget->hasStdExtP()">,
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 4c1a9929d2574..4ed1ce3ac9dbe 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -2471,7 +2471,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
CurDAG->RemoveDeadNode(Node);
return;
}
- if (Subtarget->hasStdExtP()) {
+ if (Subtarget->hasStdExtP() && Subtarget->enablePExtCodeGen()) {
if (((VT == MVT::v4i16 || VT == MVT::v8i8) && SrcVT == MVT::i64) ||
((SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) && VT == MVT::i64)) {
ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index caaf7a303110a..6ffaf430d54b1 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -87,12 +87,6 @@ static cl::opt<bool>
"be combined with a shift"),
cl::init(true));
-static cl::opt<bool> EnablePExtCodeGen(
- DEBUG_TYPE "-enable-p-ext-codegen", cl::Hidden,
- cl::desc("Turn on P Extension codegen(This is a temporary switch where "
- "only partial codegen is currently supported."),
- cl::init(false));
-
RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
const RISCVSubtarget &STI)
: TargetLowering(TM), Subtarget(STI) {
@@ -286,7 +280,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
// fixed vector is stored in GPRs for P extension packed operations
- if (Subtarget.hasStdExtP() && EnablePExtCodeGen) {
+ if (Subtarget.hasStdExtP() && Subtarget.enablePExtCodeGen()) {
if (Subtarget.is64Bit()) {
addRegisterClass(MVT::v2i32, &RISCV::GPRRegClass);
addRegisterClass(MVT::v4i16, &RISCV::GPRRegClass);
@@ -497,7 +491,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
ISD::FROUNDEVEN, ISD::FCANONICALIZE};
- if (Subtarget.hasStdExtP() && EnablePExtCodeGen) {
+ if (Subtarget.hasStdExtP() && Subtarget.enablePExtCodeGen()) {
setTargetDAGCombine(ISD::TRUNCATE);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
setTruncStoreAction(MVT::v4i16, MVT::v4i8, Expand);
@@ -511,8 +505,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setTruncStoreAction(MVT::v4i16, MVT::v4i8, Expand);
setOperationAction(ISD::LOAD, MVT::v2i16, Custom);
setOperationAction(ISD::LOAD, MVT::v4i8, Custom);
- setOperationAction(ISD::STORE, MVT::v2i16, Custom);
- setOperationAction(ISD::STORE, MVT::v4i8, Custom);
} else {
VTs.append({MVT::v2i16, MVT::v4i8});
}
@@ -526,6 +518,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::ABDS, ISD::ABDU}, VTs, Legal);
setOperationAction(ISD::BUILD_VECTOR, VTs, Custom);
setOperationAction(ISD::BITCAST, VTs, Custom);
+ setOperationAction(ISD::EXTRACT_VECTOR_ELT, VTs, Custom);
}
if (Subtarget.hasStdExtZfbfmin()) {
@@ -1747,7 +1740,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
TargetLoweringBase::LegalizeTypeAction
RISCVTargetLowering::getPreferredVectorAction(MVT VT) const {
- if (Subtarget.hasStdExtP() && Subtarget.is64Bit())
+ if (Subtarget.hasStdExtP() && Subtarget.is64Bit() &&
+ Subtarget.enablePExtCodeGen())
if (VT == MVT::v2i16 || VT == MVT::v4i8)
return TypeWidenVector;
@@ -4370,7 +4364,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
SDLoc DL(Op);
// Handle P extension packed vector BUILD_VECTOR with PLI for splat constants
- if (Subtarget.hasStdExtP() && EnablePExtCodeGen) {
+ if (Subtarget.hasStdExtP() && Subtarget.enablePExtCodeGen()) {
bool IsPExtVector =
(VT == MVT::v2i16 || VT == MVT::v4i8) ||
(Subtarget.is64Bit() &&
@@ -4382,15 +4376,18 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
bool IsValidImm = false;
// Check immediate range based on vector type
- if (VT == MVT::v8i8 || VT == MVT::v4i8)
- // PLI_B uses 8-bit unsigned immediate
- IsValidImm = isUInt<8>(SplatImm);
- else
+ if (VT == MVT::v8i8 || VT == MVT::v4i8) {
+ // PLI_B uses 8-bit unsigned or unsigned immediate
+ IsValidImm = isUInt<8>(SplatImm) || isInt<8>(SplatImm);
+ if (isUInt<8>(SplatImm))
+ SplatImm = (int8_t)SplatImm;
+ } else {
// PLI_H and PLI_W use 10-bit signed immediate
IsValidImm = isInt<10>(SplatImm);
+ }
if (IsValidImm) {
- SDValue Imm = DAG.getConstant(SplatImm, DL, XLenVT);
+ SDValue Imm = DAG.getSignedConstant(SplatImm, DL, XLenVT);
return DAG.getNode(RISCVISD::PLI, DL, VT, Imm);
}
}
@@ -7548,7 +7545,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
}
- if (Subtarget.hasStdExtP()) {
+ if (Subtarget.hasStdExtP() && Subtarget.enablePExtCodeGen()) {
bool Is32BitCast =
(VT == MVT::i32 && (Op0VT == MVT::v4i8 || Op0VT == MVT::v2i16)) ||
(Op0VT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
@@ -8233,7 +8230,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
auto *Store = cast<StoreSDNode>(Op);
SDValue StoredVal = Store->getValue();
EVT VT = StoredVal.getValueType();
- if (Subtarget.hasStdExtP()) {
+ if (Subtarget.hasStdExtP() && Subtarget.enablePExtCodeGen()) {
if (VT == MVT::v2i16 || VT == MVT::v4i8) {
SDValue DL(Op);
SDValue Cast = DAG.getBitcast(MVT::i32, StoredVal);
@@ -10526,6 +10523,27 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
}
+ if (Subtarget.hasStdExtP() && Subtarget.enablePExtCodeGen() &&
+ VecVT.isFixedLengthVector()) {
+ if (VecVT != MVT::v4i16 && VecVT != MVT::v2i16 && VecVT != MVT::v8i8 &&
+ VecVT != MVT::v4i8 && VecVT != MVT::v2i32)
+ return SDValue();
+ SDValue Extracted = DAG.getBitcast(XLenVT, Vec);
+ unsigned ElemWidth = EltVT.getSizeInBits();
+ if (auto *IdxC = dyn_cast<ConstantSDNode>(Idx)) {
+ unsigned Idx = IdxC->getZExtValue();
+ unsigned Shamt = Idx * ElemWidth;
+ if (Shamt > 0)
+ Extracted = DAG.getNode(ISD::SRL, DL, XLenVT,
+ DAG.getConstant(Shamt, DL, XLenVT));
+ } else {
+ SDValue Shamt = DAG.getNode(ISD::MUL, DL, XLenVT, Idx,
+ DAG.getConstant(ElemWidth, DL, XLenVT));
+ Extracted = DAG.getNode(ISD::SRL, DL, XLenVT, Shamt);
+ }
+ return DAG.getNode(ISD::TRUNCATE, DL, EltVT, Extracted);
+ }
+
// If this is a fixed vector, we need to convert it to a scalable vector.
MVT ContainerVT = VecVT;
if (VecVT.isFixedLengthVector()) {
@@ -14671,16 +14689,19 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
- if (Subtarget.hasStdExtP() && Subtarget.is64Bit()) {
+ if (Subtarget.hasStdExtP() && Subtarget.is64Bit() &&
+ Subtarget.enablePExtCodeGen()) {
SDLoc DL(N);
SDValue ExtLoad =
DAG.getExtLoad(ISD::SEXTLOAD, DL, MVT::i64, Ld->getChain(),
Ld->getBasePtr(), MVT::i32, Ld->getMemOperand());
- if (N->getValueType(0) == MVT::v2i16)
+ if (N->getValueType(0) == MVT::v2i16) {
Results.push_back(DAG.getBitcast(MVT::v4i16, ExtLoad));
- else if (N->getValueType(0) == MVT::v4i8)
+ Results.push_back(ExtLoad.getValue(1));
+ } else if (N->getValueType(0) == MVT::v4i8) {
Results.push_back(DAG.getBitcast(MVT::v8i8, ExtLoad));
- Results.push_back(ExtLoad.getValue(1));
+ Results.push_back(ExtLoad.getValue(1));
+ }
return;
}
@@ -16141,10 +16162,12 @@ static SDValue combinePExtTruncate(SDNode *N, SelectionDAG &DAG,
const RISCVSubtarget &Subtarget) {
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (!Subtarget.hasStdExtP() || !VT.isFixedLengthVector())
+ if (N0.getOpcode() != ISD::SRL)
return SDValue();
- if (N0.getOpcode() != ISD::SRL)
+ MVT VecVT = VT.getSimpleVT();
+ if (VecVT != MVT::v4i16 && VecVT != MVT::v2i16 && VecVT != MVT::v8i8 &&
+ VecVT != MVT::v4i8 && VecVT != MVT::v2i32)
return SDValue();
// Check if shift amount is 1
@@ -16191,18 +16214,12 @@ static SDValue combinePExtTruncate(SDNode *N, SelectionDAG &DAG,
// Determine the instruction based on type and signedness
unsigned Opc;
- MVT VecVT = VT.getSimpleVT();
- if (VecVT == MVT::v4i16 || VecVT == MVT::v2i16 || VecVT == MVT::v8i8 ||
- VecVT == MVT::v4i8 || VecVT == MVT::v2i32) {
- if (IsSignExt)
- Opc = RISCVISD::PASUB;
- else if (IsZeroExt)
- Opc = RISCVISD::PASUBU;
- else
- return SDValue();
- } else {
+ if (IsSignExt)
+ Opc = RISCVISD::PASUB;
+ else if (IsZeroExt)
+ Opc = RISCVISD::PASUBU;
+ else
return SDValue();
- }
// Create the machine node directly
return DAG.getNode(Opc, SDLoc(N), VT, {A, B});
@@ -16213,7 +16230,8 @@ static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (Subtarget.hasStdExtP() && VT.isFixedLengthVector() && EnablePExtCodeGen)
+ if (Subtarget.hasStdExtP() && VT.isFixedLengthVector() &&
+ Subtarget.enablePExtCodeGen())
return combinePExtTruncate(N, DAG, Subtarget);
// Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index a07d441b448d2..b72ebb39ee43a 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -972,7 +972,7 @@ InstructionCost RISCVTTIImpl::getScalarizationOverhead(
// TODO: Add proper cost model for P extension fixed vectors (e.g., v4i16)
// For now, skip all fixed vector cost analysis when P extension is available
// to avoid crashes in getMinRVVVectorSizeInBits()
- if (ST->hasStdExtP() && isa<FixedVectorType>(Ty)) {
+ if (ST->hasStdExtP() && ST->enablePExtCodeGen() && isa<FixedVectorType>(Ty)) {
return 1; // Treat as single instruction cost for now
}
@@ -1635,7 +1635,7 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
// TODO: Add proper cost model for P extension fixed vectors (e.g., v4i16)
// For now, skip all fixed vector cost analysis when P extension is available
// to avoid crashes in getMinRVVVectorSizeInBits()
- if (ST->hasStdExtP() &&
+ if (ST->hasStdExtP() && ST->enablePExtCodeGen() &&
(isa<FixedVectorType>(Dst) || isa<FixedVectorType>(Src))) {
return 1; // Treat as single instruction cost for now
}
@@ -2339,7 +2339,8 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
// TODO: Add proper cost model for P extension fixed vectors (e.g., v4i16)
// For now, skip all fixed vector cost analysis when P extension is available
// to avoid crashes in getMinRVVVectorSizeInBits()
- if (ST->hasStdExtP() && isa<FixedVectorType>(Val)) {
+ if (ST->hasStdExtP() && ST->enablePExtCodeGen() &&
+ isa<FixedVectorType>(Val)) {
return 1; // Treat as single instruction cost for now
}
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll
index 1a7a72d3e072b..c9b62755c18b4 100644
--- a/llvm/test/CodeGen/RISCV/features-info.ll
+++ b/llvm/test/CodeGen/RISCV/features-info.ll
@@ -23,6 +23,7 @@
; CHECK-NEXT: disable-postmisched-store-clustering - Disable PostRA store clustering in the machine scheduler.
; CHECK-NEXT: dlen-factor-2 - Vector unit DLEN(data path width) is half of VLEN.
; CHECK-NEXT: e - 'E' (Embedded Instruction Set with 16 GPRs).
+; CHECK-NEXT: enable-p-ext-codegen - Turn on P Extension codegen(This is a temporary switch where only partial codegen is currently supported).
; CHECK-NEXT: exact-asm - Enable Exact Assembly (Disables Compression and Relaxation).
; CHECK-NEXT: experimental - Experimental intrinsics.
; CHECK-NEXT: experimental-p - 'P' ('Base P' (Packed SIMD)).
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
index fce0a31d60335..7ddcb4abf1a1d 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -riscv-lower-enable-p-ext-codegen -verify-machineinstrs < %s | FileCheck %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -riscv-lower-enable-p-ext-codegen -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-p,+enable-p-ext-codegen -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK-RV32 %s
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-p,+enable-p-ext-codegen -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK-RV64 %s
; Test basic add/sub operations for v2i16
define void @test_padd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
@@ -422,28 +422,78 @@ define void @test_pasubu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; Test PLI (pack load immediate) for v2i16
define void @test_pli_h(ptr %ret_ptr) {
-; CHECK-LABEL: test_pli_h:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pli.h a1, 42
-; CHECK-NEXT: sw a1, 0(a0)
-; CHECK-NEXT: ret
+; CHECK-RV32-LABEL: test_pli_h:
+; CHECK-RV32: # %bb.0:
+; CHECK-RV32-NEXT: pli.h a1, 42
+; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: ret
+;
+; CHECK-RV64-LABEL: test_pli_h:
+; CHECK-RV64: # %bb.0:
+; CHECK-RV64-NEXT: lui a1, 672
+; CHECK-RV64-NEXT: addi a1, a1, 42
+; CHECK-RV64-NEXT: sw a1, 0(a0)
+; CHECK-RV64-NEXT: ret
%res = add <2 x i16> <i16 42, i16 42>, <i16 0, i16 0>
store <2 x i16> %res, ptr %ret_ptr
ret void
}
+define void @test_pli_h_negative(ptr %ret_ptr) {
+; CHECK-RV32-LABEL: test_pli_h_negative:
+; CHECK-RV32: # %bb.0:
+; CHECK-RV32-NEXT: pli.h a1, -5
+; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: ret
+;
+; CHECK-RV64-LABEL: test_pli_h_negative:
+; CHECK-RV64: # %bb.0:
+; CHECK-RV64-NEXT: lui a1, 1048512
+; CHECK-RV64-NEXT: addi a1, a1, -5
+; CHECK-RV64-NEXT: sw a1, 0(a0)
+; CHECK-RV64-NEXT: ret
+ %res = add <2 x i16> <i16 -5, i16 -5>, <i16 0, i16 0>
+ store <2 x i16> %res, ptr %ret_ptr
+ ret void
+}
+
; Test PLI for v4i8 with unsigned immediate
define void @test_pli_b(ptr %ret_ptr) {
-; CHECK-LABEL: test_pli_b:
-; CHECK: # %bb.0:
-; CHECK-NEXT: pli.b a1, 32
-; CHECK-NEXT: sw a1, 0(a0)
-; CHECK-NEXT: ret
+; CHECK-RV32-LABEL: test_pli_b:
+; CHECK-RV32: # %bb.0:
+; CHECK-RV32-NEXT: pli.b a1, 32
+; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: ret
+;
+; CHECK-RV64-LABEL: test_pli_b:
+; CHECK-RV64: # %bb.0:
+; CHECK-RV64-NEXT: lui a1, 131586
+; CHECK-RV64-NEXT: addi a1, a1, 32
+; CHECK-RV64-NEXT: sw a1, 0(a0)
+; CHECK-RV64-NEXT: ret
%res = add <4 x i8> <i8 32, i8 32, i8 32, i8 32>, <i8 0, i8 0, i8 0, i8 0>
store <4 x i8> %res, ptr %ret_ptr
ret void
}
+define void @test_pli_b_negative(ptr %ret_ptr) {
+; CHECK-RV32-LABEL: test_pli_b_negative:
+; CHECK-RV32: # %bb.0:
+; CHECK-RV32-NEXT: pli.b a1, -2
+; CHECK-RV32-NEXT: sw a1, 0(a0)
+; CHECK-RV32-NEXT: ret
+;
+; CHECK-RV64-LABEL: test_pli_b_negative:
+; CHECK-RV64: # %bb.0:
+; CHECK-RV64-NEXT: lui a1, 1044464
+; CHECK-RV64-NEXT: addi a1, a1, -258
+; CHECK-RV64-NEXT: sw a1, 0(a0)
+; CHECK-RV64-NEXT: ret
+ %res = add <4 x i8> <i8 -2, i8 -2, i8 -2, i8 -2>, <i8 0, i8 0, i8 0, i8 0>
+ store <4 x i8> %res, ptr %ret_ptr
+ ret void
+}
+
; Intrinsic declarations
declare <2 x i16> @llvm.sadd.sat.v2i16(<2 x i16>, <2 x i16>)
declare <2 x i16> @llvm.uadd.sat.v2i16(<2 x i16>, <2 x i16>)
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
index 67cfb0e2123a4..eb81ab31a107e 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -riscv-lower-enable-p-ext-codegen -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-p,+enable-p-ext-codegen -verify-machineinstrs < %s | FileCheck %s
; Test basic add/sub operations for v4i16
define void @test_padd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
>From e10520a85368429bcab2044e3ea2ef5abe70dbb2 Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Thu, 30 Oct 2025 03:47:35 -0700
Subject: [PATCH 08/13] fixup! teach RISCVMat about PLI
---
.../Target/RISCV/MCTargetDesc/RISCVMatInt.cpp | 29 ++++++++++
.../Target/RISCV/MCTargetDesc/RISCVMatInt.h | 2 +-
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 8 +++
llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll | 54 ++++++-------------
4 files changed, 54 insertions(+), 39 deletions(-)
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index 26f434b528584..186304b72773b 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -79,6 +79,32 @@ static void generateInstSeqImpl(int64_t Val, const MCSubtargetInfo &STI,
}
}
+ if (STI.hasFeature(RISCV::FeatureStdExtP)) {
+ // Check if the immediate is packed i8 or i10
+ int32_t Bit63To32 = Val >> 32;
+ int32_t Bit31To0 = Val & 0xFFFFFFFF;
+ int16_t Bit31To16 = Bit31To0 >> 16;
+ int16_t Bit15To0 = Bit31To0 & 0xFFFF;
+ int8_t Bit15To8 = Bit15To0 >> 8;
+ int8_t Bit7To0 = Bit15To0 & 0xFF;
+ if (Bit63To32 == Bit31To0) {
+ if (IsRV64 && isInt<10>(Bit63To32)) {
+ Res.emplace_back(RISCV::PLI_W, Bit63To32);
+ return;
+ }
+ if (Bit31To16 == Bit15To0) {
+ if (isInt<10>(Bit31To16)) {
+ Res.emplace_back(RISCV::PLI_H, Bit31To16);
+ return;
+ }
+ if (Bit15To8 == Bit7To0) {
+ Res.emplace_back(RISCV::PLI_B, Bit15To8);
+ return;
+ }
+ }
+ }
+ }
+
if (isInt<32>(Val)) {
// Depending on the active bits in the immediate Value v, the following
// instruction sequences are emitted:
@@ -562,6 +588,9 @@ OpndKind Inst::getOpndKind() const {
case RISCV::LUI:
case RISCV::QC_LI:
case RISCV::QC_E_LI:
+ case RISCV::PLI_B:
+ case RISCV::PLI_H:
+ case RISCV::PLI_W:
return RISCVMatInt::Imm;
case RISCV::ADD_UW:
return RISCVMatInt::RegX0;
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
index a82cd650f42fa..5df8edb2ee85a 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.h
@@ -21,7 +21,7 @@ namespace RISCVMatInt {
enum OpndKind {
RegImm, // ADDI/ADDIW/XORI/SLLI/SRLI/SLLI_UW/RORI/BSETI/BCLRI/TH_SRRI
- Imm, // LUI/QC_LI/QC_E_LI
+ Imm, // LUI/QC_LI/QC_E_LI/PLI_B/PLI_H/PLI_W
RegReg, // SH1ADD/SH2ADD/SH3ADD/PACK
RegX0, // ADD_UW
};
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 9b925ffdbd39e..b6e0c0779c9d5 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1034,6 +1034,14 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
Imm = SignExtend64<32>(Imm);
+ if (hasAllWUsers(Node) && Subtarget->hasStdExtP() &&
+ Subtarget->enablePExtCodeGen()) {
+ // If its 4 packed 8 bit integer or 2 packed signed integer, we can simply
+ // copy lower 32 bits to higher 32 bits to make it able to rematerialize
+ // to PLI_B or PLI_H
+ Imm = (Imm << 32) | (Imm & 0xFFFFFFFF);
+ }
+
ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
return;
}
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
index 7ddcb4abf1a1d..418d17ece403b 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
@@ -422,36 +422,22 @@ define void @test_pasubu_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
; Test PLI (pack load immediate) for v2i16
define void @test_pli_h(ptr %ret_ptr) {
-; CHECK-RV32-LABEL: test_pli_h:
-; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: pli.h a1, 42
-; CHECK-RV32-NEXT: sw a1, 0(a0)
-; CHECK-RV32-NEXT: ret
-;
-; CHECK-RV64-LABEL: test_pli_h:
-; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: lui a1, 672
-; CHECK-RV64-NEXT: addi a1, a1, 42
-; CHECK-RV64-NEXT: sw a1, 0(a0)
-; CHECK-RV64-NEXT: ret
+; CHECK-LABEL: test_pli_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pli.h a1, 42
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
%res = add <2 x i16> <i16 42, i16 42>, <i16 0, i16 0>
store <2 x i16> %res, ptr %ret_ptr
ret void
}
define void @test_pli_h_negative(ptr %ret_ptr) {
-; CHECK-RV32-LABEL: test_pli_h_negative:
-; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: pli.h a1, -5
-; CHECK-RV32-NEXT: sw a1, 0(a0)
-; CHECK-RV32-NEXT: ret
-;
-; CHECK-RV64-LABEL: test_pli_h_negative:
-; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: lui a1, 1048512
-; CHECK-RV64-NEXT: addi a1, a1, -5
-; CHECK-RV64-NEXT: sw a1, 0(a0)
-; CHECK-RV64-NEXT: ret
+; CHECK-LABEL: test_pli_h_negative:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pli.h a1, -5
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
%res = add <2 x i16> <i16 -5, i16 -5>, <i16 0, i16 0>
store <2 x i16> %res, ptr %ret_ptr
ret void
@@ -459,18 +445,11 @@ define void @test_pli_h_negative(ptr %ret_ptr) {
; Test PLI for v4i8 with unsigned immediate
define void @test_pli_b(ptr %ret_ptr) {
-; CHECK-RV32-LABEL: test_pli_b:
-; CHECK-RV32: # %bb.0:
-; CHECK-RV32-NEXT: pli.b a1, 32
-; CHECK-RV32-NEXT: sw a1, 0(a0)
-; CHECK-RV32-NEXT: ret
-;
-; CHECK-RV64-LABEL: test_pli_b:
-; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: lui a1, 131586
-; CHECK-RV64-NEXT: addi a1, a1, 32
-; CHECK-RV64-NEXT: sw a1, 0(a0)
-; CHECK-RV64-NEXT: ret
+; CHECK-LABEL: test_pli_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: pli.b a1, 32
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
%res = add <4 x i8> <i8 32, i8 32, i8 32, i8 32>, <i8 0, i8 0, i8 0, i8 0>
store <4 x i8> %res, ptr %ret_ptr
ret void
@@ -485,8 +464,7 @@ define void @test_pli_b_negative(ptr %ret_ptr) {
;
; CHECK-RV64-LABEL: test_pli_b_negative:
; CHECK-RV64: # %bb.0:
-; CHECK-RV64-NEXT: lui a1, 1044464
-; CHECK-RV64-NEXT: addi a1, a1, -258
+; CHECK-RV64-NEXT: pli.h a1, -258
; CHECK-RV64-NEXT: sw a1, 0(a0)
; CHECK-RV64-NEXT: ret
%res = add <4 x i8> <i8 -2, i8 -2, i8 -2, i8 -2>, <i8 0, i8 0, i8 0, i8 0>
>From aa7bb56e0aebbe9e73932c37d104676f78fd82c3 Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Thu, 30 Oct 2025 10:44:50 -0700
Subject: [PATCH 09/13] fixup! resolve comment
---
.../Target/RISCV/MCTargetDesc/RISCVMatInt.cpp | 6 +++---
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 18 +++++++++++++++---
2 files changed, 18 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
index 186304b72773b..cedaa8679ff1b 100644
--- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
+++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp
@@ -82,11 +82,11 @@ static void generateInstSeqImpl(int64_t Val, const MCSubtargetInfo &STI,
if (STI.hasFeature(RISCV::FeatureStdExtP)) {
// Check if the immediate is packed i8 or i10
int32_t Bit63To32 = Val >> 32;
- int32_t Bit31To0 = Val & 0xFFFFFFFF;
+ int32_t Bit31To0 = Val;
int16_t Bit31To16 = Bit31To0 >> 16;
- int16_t Bit15To0 = Bit31To0 & 0xFFFF;
+ int16_t Bit15To0 = Bit31To0;
int8_t Bit15To8 = Bit15To0 >> 8;
- int8_t Bit7To0 = Bit15To0 & 0xFF;
+ int8_t Bit7To0 = Bit15To0;
if (Bit63To32 == Bit31To0) {
if (IsRV64 && isInt<10>(Bit63To32)) {
Res.emplace_back(RISCV::PLI_W, Bit63To32);
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index b6e0c0779c9d5..c0dda886b6065 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -991,6 +991,18 @@ static unsigned getSegInstNF(unsigned Intrinsic) {
}
}
+static bool isApplicableToPLI(int Val) {
+ // Check if the immediate is packed i8 or i10
+ int16_t Bit31To16 = Val >> 16;
+ int16_t Bit15To0 = Val;
+ int8_t Bit15To8 = Bit15To0 >> 8;
+ int8_t Bit7To0 = Val;
+ if (Bit31To16 != Bit15To0)
+ return false;
+
+ return isInt<10>(Bit31To16) || Bit15To8 == Bit7To0;
+}
+
void RISCVDAGToDAGISel::Select(SDNode *Node) {
// If we have a custom node, we have already selected.
if (Node->isMachineOpcode()) {
@@ -1034,12 +1046,12 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
if (!isInt<32>(Imm) && isUInt<32>(Imm) && hasAllWUsers(Node))
Imm = SignExtend64<32>(Imm);
- if (hasAllWUsers(Node) && Subtarget->hasStdExtP() &&
- Subtarget->enablePExtCodeGen()) {
+ if (hasAllWUsers(Node) && isApplicableToPLI(Imm) &&
+ Subtarget->hasStdExtP() && Subtarget->enablePExtCodeGen()) {
// If its 4 packed 8 bit integer or 2 packed signed integer, we can simply
// copy lower 32 bits to higher 32 bits to make it able to rematerialize
// to PLI_B or PLI_H
- Imm = (Imm << 32) | (Imm & 0xFFFFFFFF);
+ Imm = ((uint64_t)Imm << 32) | (Imm & 0xFFFFFFFF);
}
ReplaceNode(Node, selectImm(CurDAG, DL, VT, Imm, *Subtarget).getNode());
>From 9694d2cde9eb84ff605241f139a86d1b51cd7c26 Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Thu, 30 Oct 2025 10:49:02 -0700
Subject: [PATCH 10/13] fixup! accidentally deleted line
---
llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 1 +
1 file changed, 1 insertion(+)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index ed03a68f3e52f..6ac04efaa3523 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1456,6 +1456,7 @@ let Predicates = [HasStdExtP, IsRV32] in {
def PMAXU_DB : RVPPairBinaryExchanged_rr<0b1111, 0b10, "pmaxu.db">;
} // Predicates = [HasStdExtP, IsRV32]
+
//===----------------------------------------------------------------------===//
// Codegen patterns
//===----------------------------------------------------------------------===//
>From 3502c4db8ab9a50f19eb6eeb0f9f6dc289991dc9 Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Mon, 3 Nov 2025 01:52:43 -0800
Subject: [PATCH 11/13] fixup! remove unused isd
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 --
1 file changed, 2 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 205846db9ff4a..84411199440a5 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -520,8 +520,6 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SADDSAT, VTs, Legal);
setOperationAction(ISD::USUBSAT, VTs, Legal);
setOperationAction(ISD::SSUBSAT, VTs, Legal);
- setOperationAction(ISD::SSHLSAT, VTs, Legal);
- setOperationAction(ISD::USHLSAT, VTs, Legal);
setOperationAction({ISD::AVGFLOORS, ISD::AVGFLOORU}, VTs, Legal);
setOperationAction({ISD::ABDS, ISD::ABDU}, VTs, Legal);
setOperationAction(ISD::BUILD_VECTOR, VTs, Custom);
>From bba3c88293a91788a52d9cce52be585958f72edb Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Mon, 3 Nov 2025 19:33:56 -0800
Subject: [PATCH 12/13] fixup! use timm, remove unused switch case
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +-
llvm/lib/Target/RISCV/RISCVInstrInfo.cpp | 4 ----
llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 4 ++--
3 files changed, 3 insertions(+), 7 deletions(-)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index 84411199440a5..a2593f52cef30 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -4395,7 +4395,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
}
if (IsValidImm) {
- SDValue Imm = DAG.getSignedConstant(SplatImm, DL, XLenVT);
+ SDValue Imm = DAG.getSignedTargetConstant(SplatImm, DL, XLenVT);
return DAG.getNode(RISCVISD::PLI, DL, VT, Imm);
}
}
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
index cd01362d2c6d1..fd7f84bc15a1c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.cpp
@@ -2911,9 +2911,6 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
case RISCVOp::OPERAND_SIMM8_UNSIGNED:
Ok = isInt<8>(Imm);
break;
- case RISCVOp::OPERAND_SIMM10_UNSIGNED:
- Ok = isInt<10>(Imm);
- break;
case RISCVOp::OPERAND_SIMM10_LSB0000_NONZERO:
Ok = isShiftedInt<6, 4>(Imm) && (Imm != 0);
break;
@@ -2935,7 +2932,6 @@ bool RISCVInstrInfo::verifyInstruction(const MachineInstr &MI,
// clang-format off
CASE_OPERAND_SIMM(5)
CASE_OPERAND_SIMM(6)
- CASE_OPERAND_SIMM(8)
CASE_OPERAND_SIMM(10)
CASE_OPERAND_SIMM(11)
CASE_OPERAND_SIMM(12)
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 6ac04efaa3523..680a562d4ffd2 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -18,7 +18,7 @@
// Operand and SDNode transformation definitions.
//===----------------------------------------------------------------------===//
-def simm10 : RISCVSImmLeafOp<10>;
+def simm10 : RISCVSImmOp<10>, TImmLeaf<XLenVT, "return isInt<10>(Imm);">;
def SImm8UnsignedAsmOperand : SImmAsmOperand<8, "Unsigned"> {
let RenderMethod = "addSImm8UnsignedOperands";
@@ -26,7 +26,7 @@ def SImm8UnsignedAsmOperand : SImmAsmOperand<8, "Unsigned"> {
// A 8-bit signed immediate allowing range [-128, 255]
// but represented as [-128, 127].
-def simm8_unsigned : RISCVSImmLeafOp<8> {
+def simm8_unsigned : RISCVOp, TImmLeaf<XLenVT, "return isInt<8>(Imm);"> {
let ParserMatchClass = SImm8UnsignedAsmOperand;
let EncoderMethod = "getImmOpValue";
let DecoderMethod = "decodeSImmOperand<8>";
>From 63194c26e0dc75ba4fd54939711a7f8bddf5b83e Mon Sep 17 00:00:00 2001
From: Brandon Wu <songwu0813 at gmail.com>
Date: Mon, 3 Nov 2025 23:08:34 -0800
Subject: [PATCH 13/13] fixup! cleanup, typo, subtarget
---
.../Target/RISCV/AsmParser/RISCVAsmParser.cpp | 1 -
llvm/lib/Target/RISCV/RISCVFeatures.td | 4 ----
llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp | 10 ++++-----
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 22 ++++++++-----------
llvm/lib/Target/RISCV/RISCVSubtarget.cpp | 10 +++++++++
llvm/lib/Target/RISCV/RISCVSubtarget.h | 2 ++
.../Target/RISCV/RISCVTargetTransformInfo.cpp | 7 +++---
llvm/test/CodeGen/RISCV/features-info.ll | 1 -
llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll | 4 ++--
llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll | 2 +-
10 files changed, 32 insertions(+), 31 deletions(-)
diff --git a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
index 82d98ab44cb99..edde7ac487da3 100644
--- a/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
+++ b/llvm/lib/Target/RISCV/AsmParser/RISCVAsmParser.cpp
@@ -809,7 +809,6 @@ struct RISCVOperand final : public MCParsedAsmOperand {
bool isSImm5() const { return isSImm<5>(); }
bool isSImm6() const { return isSImm<6>(); }
- bool isSImm8() const { return isSImm<8>(); }
bool isSImm10() const { return isSImm<10>(); }
bool isSImm11() const { return isSImm<11>(); }
bool isSImm12() const { return isSImm<12>(); }
diff --git a/llvm/lib/Target/RISCV/RISCVFeatures.td b/llvm/lib/Target/RISCV/RISCVFeatures.td
index fd5cb17a68932..cfee6ab22d4ff 100644
--- a/llvm/lib/Target/RISCV/RISCVFeatures.td
+++ b/llvm/lib/Target/RISCV/RISCVFeatures.td
@@ -1104,10 +1104,6 @@ def FeatureStdExtP
def HasStdExtP : Predicate<"Subtarget->hasStdExtP()">,
AssemblerPredicate<(all_of FeatureStdExtP),
"'Base P' (Packed SIMD)">;
-def FeatureEnablePExtCodeGen
- : SubtargetFeature<"enable-p-ext-codegen", "EnablePExtCodeGen",
- "true", "Turn on P Extension codegen(This is a temporary"
- " switch where only partial codegen is currently supported)">;
def HasStdExtZbaOrP
: Predicate<"Subtarget->hasStdExtZba() || Subtarget->hasStdExtP()">,
diff --git a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
index 24b02080bd325..1e7d4c49eeeb7 100644
--- a/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelDAGToDAG.cpp
@@ -1047,10 +1047,10 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
Imm = SignExtend64<32>(Imm);
if (hasAllWUsers(Node) && isApplicableToPLI(Imm) &&
- Subtarget->hasStdExtP() && Subtarget->enablePExtCodeGen()) {
- // If its 4 packed 8 bit integer or 2 packed signed integer, we can simply
- // copy lower 32 bits to higher 32 bits to make it able to rematerialize
- // to PLI_B or PLI_H
+ Subtarget->enablePExtCodeGen()) {
+ // If its 4 packed 8-bit integers or 2 packed signed 16-bit integers, we
+ // can simply copy lower 32 bits to higher 32 bits to make it able to
+ // rematerialize to PLI_B or PLI_H
Imm = ((uint64_t)Imm << 32) | (Imm & 0xFFFFFFFF);
}
@@ -2674,7 +2674,7 @@ void RISCVDAGToDAGISel::Select(SDNode *Node) {
CurDAG->RemoveDeadNode(Node);
return;
}
- if (Subtarget->hasStdExtP() && Subtarget->enablePExtCodeGen()) {
+ if (Subtarget->enablePExtCodeGen()) {
if (((VT == MVT::v4i16 || VT == MVT::v8i8) && SrcVT == MVT::i64) ||
((SrcVT == MVT::v4i16 || SrcVT == MVT::v8i8) && VT == MVT::i64)) {
ReplaceUses(SDValue(Node, 0), Node->getOperand(0));
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a2593f52cef30..2535cf89a1737 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -280,7 +280,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
}
// fixed vector is stored in GPRs for P extension packed operations
- if (Subtarget.hasStdExtP() && Subtarget.enablePExtCodeGen()) {
+ if (Subtarget.enablePExtCodeGen()) {
if (Subtarget.is64Bit()) {
addRegisterClass(MVT::v2i32, &RISCV::GPRRegClass);
addRegisterClass(MVT::v4i16, &RISCV::GPRRegClass);
@@ -499,7 +499,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
ISD::FTRUNC, ISD::FRINT, ISD::FROUND,
ISD::FROUNDEVEN, ISD::FCANONICALIZE};
- if (Subtarget.hasStdExtP() && Subtarget.enablePExtCodeGen()) {
+ if (Subtarget.enablePExtCodeGen()) {
setTargetDAGCombine(ISD::TRUNCATE);
setTruncStoreAction(MVT::v2i32, MVT::v2i16, Expand);
setTruncStoreAction(MVT::v4i16, MVT::v4i8, Expand);
@@ -1748,8 +1748,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
TargetLoweringBase::LegalizeTypeAction
RISCVTargetLowering::getPreferredVectorAction(MVT VT) const {
- if (Subtarget.hasStdExtP() && Subtarget.is64Bit() &&
- Subtarget.enablePExtCodeGen())
+ if (Subtarget.is64Bit() && Subtarget.enablePExtCodeGen())
if (VT == MVT::v2i16 || VT == MVT::v4i8)
return TypeWidenVector;
@@ -4372,7 +4371,7 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG,
SDLoc DL(Op);
// Handle P extension packed vector BUILD_VECTOR with PLI for splat constants
- if (Subtarget.hasStdExtP() && Subtarget.enablePExtCodeGen()) {
+ if (Subtarget.enablePExtCodeGen()) {
bool IsPExtVector =
(VT == MVT::v2i16 || VT == MVT::v4i8) ||
(Subtarget.is64Bit() &&
@@ -7553,7 +7552,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
return DAG.getNode(RISCVISD::BuildPairF64, DL, MVT::f64, Lo, Hi);
}
- if (Subtarget.hasStdExtP() && Subtarget.enablePExtCodeGen()) {
+ if (Subtarget.enablePExtCodeGen()) {
bool Is32BitCast =
(VT == MVT::i32 && (Op0VT == MVT::v4i8 || Op0VT == MVT::v2i16)) ||
(Op0VT == MVT::i32 && (VT == MVT::v4i8 || VT == MVT::v2i16));
@@ -8238,7 +8237,7 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
auto *Store = cast<StoreSDNode>(Op);
SDValue StoredVal = Store->getValue();
EVT VT = StoredVal.getValueType();
- if (Subtarget.hasStdExtP() && Subtarget.enablePExtCodeGen()) {
+ if (Subtarget.enablePExtCodeGen()) {
if (VT == MVT::v2i16 || VT == MVT::v4i8) {
SDValue DL(Op);
SDValue Cast = DAG.getBitcast(MVT::i32, StoredVal);
@@ -10531,8 +10530,7 @@ SDValue RISCVTargetLowering::lowerEXTRACT_VECTOR_ELT(SDValue Op,
return DAG.getNode(RISCVISD::FMV_H_X, DL, EltVT, IntExtract);
}
- if (Subtarget.hasStdExtP() && Subtarget.enablePExtCodeGen() &&
- VecVT.isFixedLengthVector()) {
+ if (Subtarget.enablePExtCodeGen() && VecVT.isFixedLengthVector()) {
if (VecVT != MVT::v4i16 && VecVT != MVT::v2i16 && VecVT != MVT::v8i8 &&
VecVT != MVT::v4i8 && VecVT != MVT::v2i32)
return SDValue();
@@ -14694,8 +14692,7 @@ void RISCVTargetLowering::ReplaceNodeResults(SDNode *N,
return;
}
- if (Subtarget.hasStdExtP() && Subtarget.is64Bit() &&
- Subtarget.enablePExtCodeGen()) {
+ if (Subtarget.is64Bit() && Subtarget.enablePExtCodeGen()) {
SDLoc DL(N);
SDValue ExtLoad =
DAG.getExtLoad(ISD::SEXTLOAD, DL, MVT::i64, Ld->getChain(),
@@ -16262,8 +16259,7 @@ static SDValue performTRUNCATECombine(SDNode *N, SelectionDAG &DAG,
SDValue N0 = N->getOperand(0);
EVT VT = N->getValueType(0);
- if (Subtarget.hasStdExtP() && VT.isFixedLengthVector() &&
- Subtarget.enablePExtCodeGen())
+ if (VT.isFixedLengthVector() && Subtarget.enablePExtCodeGen())
return combinePExtTruncate(N, DAG, Subtarget);
// Pre-promote (i1 (truncate (srl X, Y))) on RV64 with Zbs without zero
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
index 715ac4cedc649..35f95093c6b10 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.cpp
@@ -69,6 +69,12 @@ static cl::opt<bool> UseMIPSCCMovInsn("use-riscv-mips-ccmov",
cl::desc("Use 'mips.ccmov' instruction"),
cl::init(true), cl::Hidden);
+static cl::opt<bool> EnablePExtCodeGen(
+ "enable-p-ext-codegen",
+ cl::desc("Turn on P Extension codegen(This is a temporary switch where "
+ "only partial codegen is currently supported)"),
+ cl::init(false), cl::Hidden);
+
void RISCVSubtarget::anchor() {}
RISCVSubtarget &
@@ -145,6 +151,10 @@ bool RISCVSubtarget::useConstantPoolForLargeInts() const {
return !RISCVDisableUsingConstantPoolForLargeInts;
}
+bool RISCVSubtarget::enablePExtCodeGen() const {
+ return HasStdExtP && EnablePExtCodeGen;
+}
+
unsigned RISCVSubtarget::getMaxBuildIntsCost() const {
// Loading integer from constant pool needs two instructions (the reason why
// the minimum cost is 2): an address calculation instruction and a load
diff --git a/llvm/lib/Target/RISCV/RISCVSubtarget.h b/llvm/lib/Target/RISCV/RISCVSubtarget.h
index 4b4fc8f0d8e76..f5e17ebcfc92f 100644
--- a/llvm/lib/Target/RISCV/RISCVSubtarget.h
+++ b/llvm/lib/Target/RISCV/RISCVSubtarget.h
@@ -322,6 +322,8 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
}
}
+ bool enablePExtCodeGen() const;
+
// Returns VLEN divided by DLEN. Where DLEN is the datapath width of the
// vector hardware implementation which may be less than VLEN.
unsigned getDLenFactor() const {
diff --git a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
index b72ebb39ee43a..b7aa5357a7f84 100644
--- a/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
+++ b/llvm/lib/Target/RISCV/RISCVTargetTransformInfo.cpp
@@ -972,7 +972,7 @@ InstructionCost RISCVTTIImpl::getScalarizationOverhead(
// TODO: Add proper cost model for P extension fixed vectors (e.g., v4i16)
// For now, skip all fixed vector cost analysis when P extension is available
// to avoid crashes in getMinRVVVectorSizeInBits()
- if (ST->hasStdExtP() && ST->enablePExtCodeGen() && isa<FixedVectorType>(Ty)) {
+ if (ST->enablePExtCodeGen() && isa<FixedVectorType>(Ty)) {
return 1; // Treat as single instruction cost for now
}
@@ -1635,7 +1635,7 @@ InstructionCost RISCVTTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst,
// TODO: Add proper cost model for P extension fixed vectors (e.g., v4i16)
// For now, skip all fixed vector cost analysis when P extension is available
// to avoid crashes in getMinRVVVectorSizeInBits()
- if (ST->hasStdExtP() && ST->enablePExtCodeGen() &&
+ if (ST->enablePExtCodeGen() &&
(isa<FixedVectorType>(Dst) || isa<FixedVectorType>(Src))) {
return 1; // Treat as single instruction cost for now
}
@@ -2339,8 +2339,7 @@ InstructionCost RISCVTTIImpl::getVectorInstrCost(unsigned Opcode, Type *Val,
// TODO: Add proper cost model for P extension fixed vectors (e.g., v4i16)
// For now, skip all fixed vector cost analysis when P extension is available
// to avoid crashes in getMinRVVVectorSizeInBits()
- if (ST->hasStdExtP() && ST->enablePExtCodeGen() &&
- isa<FixedVectorType>(Val)) {
+ if (ST->enablePExtCodeGen() && isa<FixedVectorType>(Val)) {
return 1; // Treat as single instruction cost for now
}
diff --git a/llvm/test/CodeGen/RISCV/features-info.ll b/llvm/test/CodeGen/RISCV/features-info.ll
index 5ba6a364f9b19..988d0490afeb6 100644
--- a/llvm/test/CodeGen/RISCV/features-info.ll
+++ b/llvm/test/CodeGen/RISCV/features-info.ll
@@ -23,7 +23,6 @@
; CHECK-NEXT: disable-postmisched-store-clustering - Disable PostRA store clustering in the machine scheduler.
; CHECK-NEXT: dlen-factor-2 - Vector unit DLEN(data path width) is half of VLEN.
; CHECK-NEXT: e - 'E' (Embedded Instruction Set with 16 GPRs).
-; CHECK-NEXT: enable-p-ext-codegen - Turn on P Extension codegen(This is a temporary switch where only partial codegen is currently supported).
; CHECK-NEXT: exact-asm - Enable Exact Assembly (Disables Compression and Relaxation).
; CHECK-NEXT: experimental - Experimental intrinsics.
; CHECK-NEXT: experimental-p - 'P' ('Base P' (Packed SIMD)).
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
index 418d17ece403b..8bb7347ce0589 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv32 -mattr=+experimental-p,+enable-p-ext-codegen -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK-RV32 %s
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-p,+enable-p-ext-codegen -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK-RV64 %s
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-p -enable-p-ext-codegen -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK-RV32 %s
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -enable-p-ext-codegen -verify-machineinstrs < %s | FileCheck --check-prefixes=CHECK,CHECK-RV64 %s
; Test basic add/sub operations for v2i16
define void @test_padd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
index eb81ab31a107e..b1bbfcdbc56e6 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
-; RUN: llc -mtriple=riscv64 -mattr=+experimental-p,+enable-p-ext-codegen -verify-machineinstrs < %s | FileCheck %s
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-p -enable-p-ext-codegen -verify-machineinstrs < %s | FileCheck %s
; Test basic add/sub operations for v4i16
define void @test_padd_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
More information about the llvm-commits
mailing list