[llvm] [RISCV][llvm] Support min/max codegen for P extension (PR #175494)
Brandon Wu via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 11 23:34:19 PST 2026
https://github.com/4vtomat created https://github.com/llvm/llvm-project/pull/175494
None
>From 10dc41ca4bedc75f97e52cba6e685a463d2d3ef1 Mon Sep 17 00:00:00 2001
From: Brandon Wu <brandon.wu at sifive.com>
Date: Sun, 11 Jan 2026 23:32:14 -0800
Subject: [PATCH] [RISCV][llvm] Support min/max codegen for P extension
---
llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 2 +
llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 16 ++
llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll | 121 +++++++++++++
llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll | 181 ++++++++++++++++++++
4 files changed, 320 insertions(+)
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index db1973fc27326..26e4055aaf6a2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -554,6 +554,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM,
ISD::SDIVREM, ISD::UDIVREM},
VTs, Expand);
+ setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, VTs,
+ Legal);
setOperationAction(ISD::SETCC, VTs, Legal);
setCondCodeAction({ISD::SETNE, ISD::SETGT, ISD::SETGE, ISD::SETUGT,
ISD::SETUGE, ISD::SETULE, ISD::SETLE},
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index d2b122df62264..b3ad517fc40f9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1612,6 +1612,16 @@ let Predicates = [HasStdExtP] in {
(PMSLT_H GPR:$rs1, GPR:$rs2)>;
def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETULT)),
(PMSLTU_H GPR:$rs1, GPR:$rs2)>;
+
+ // 8/16-bit [s|u]min/[s|u]max patterns
+ def: Pat<(XLenVecI8VT (smin GPR:$rs1, GPR:$rs2)), (PMIN_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (umin GPR:$rs1, GPR:$rs2)), (PMINU_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (smin GPR:$rs1, GPR:$rs2)), (PMIN_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (umin GPR:$rs1, GPR:$rs2)), (PMINU_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (smax GPR:$rs1, GPR:$rs2)), (PMAX_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (umax GPR:$rs1, GPR:$rs2)), (PMAXU_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (smax GPR:$rs1, GPR:$rs2)), (PMAX_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (umax GPR:$rs1, GPR:$rs2)), (PMAXU_H GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtP]
let Predicates = [HasStdExtP, IsRV32] in {
@@ -1710,6 +1720,12 @@ let Predicates = [HasStdExtP, IsRV64] in {
def: Pat<(v2i32 (setcc (v2i32 GPR:$rs2), (v2i32 GPR:$rs1), SETUGT)),
(PMSLTU_W GPR:$rs1, GPR:$rs2)>;
+ // 32-bit [s|u]min/[s|u]max patterns
+ def: Pat<(v2i32 (smin GPR:$rs1, GPR:$rs2)), (PMIN_W GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i32 (umin GPR:$rs1, GPR:$rs2)), (PMINU_W GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i32 (smax GPR:$rs1, GPR:$rs2)), (PMAX_W GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i32 (umax GPR:$rs1, GPR:$rs2)), (PMAXU_W GPR:$rs1, GPR:$rs2)>;
+
// 32-bit logical shift left/right patterns
def: Pat<(v2i32 (shl GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))),
(PSLLI_W GPR:$rs1, uimm5:$shamt)>;
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
index 10964a64b576a..bccb1a1a4e345 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
@@ -2019,3 +2019,124 @@ define void @test_uge_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
store <4 x i8> %res, ptr %ret_ptr
ret void
}
+
+; Test 8/16-bit [s|u]min/[s|u]max
+define void @test_smin_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_smin_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmin.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %min = call <2 x i16> @llvm.smin.v2i16(<2 x i16> %a, <2 x i16> %b)
+ store <2 x i16> %min, ptr %ret_ptr
+ ret void
+}
+
+define void @test_umin_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_umin_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pminu.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %min = call <2 x i16> @llvm.umin.v2i16(<2 x i16> %a, <2 x i16> %b)
+ store <2 x i16> %min, ptr %ret_ptr
+ ret void
+}
+
+define void @test_smin_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_smin_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmin.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %min = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %a, <4 x i8> %b)
+ store <4 x i8> %min, ptr %ret_ptr
+ ret void
+}
+
+define void @test_umin_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_umin_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pminu.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %min = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %a, <4 x i8> %b)
+ store <4 x i8> %min, ptr %ret_ptr
+ ret void
+}
+
+define void @test_smax_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_smax_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmax.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %max = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %a, <2 x i16> %b)
+ store <2 x i16> %max, ptr %ret_ptr
+ ret void
+}
+
+define void @test_umax_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_umax_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmaxu.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %max = call <2 x i16> @llvm.umax.v2i16(<2 x i16> %a, <2 x i16> %b)
+ store <2 x i16> %max, ptr %ret_ptr
+ ret void
+}
+
+define void @test_smax_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_smax_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmax.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %max = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %a, <4 x i8> %b)
+ store <4 x i8> %max, ptr %ret_ptr
+ ret void
+}
+
+define void @test_umax_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_umax_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmaxu.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %max = call <4 x i8> @llvm.umax.v4i8(<4 x i8> %a, <4 x i8> %b)
+ store <4 x i8> %max, ptr %ret_ptr
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
index 48eda85f375cb..83cba8f7b0bf0 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
@@ -2310,3 +2310,184 @@ define void @test_uge_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
store <2 x i32> %res, ptr %ret_ptr
ret void
}
+
+; Test 8/16/32-bit [s|u]min/[s|u]max
+define void @test_smin_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_smin_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmin.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %min = call <4 x i16> @llvm.smin.v2i16(<4 x i16> %a, <4 x i16> %b)
+ store <4 x i16> %min, ptr %ret_ptr
+ ret void
+}
+
+define void @test_umin_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_umin_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pminu.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %min = call <4 x i16> @llvm.umin.v2i16(<4 x i16> %a, <4 x i16> %b)
+ store <4 x i16> %min, ptr %ret_ptr
+ ret void
+}
+
+define void @test_smin_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_smin_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmin.b a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %min = call <8 x i8> @llvm.smin.v4i8(<8 x i8> %a, <8 x i8> %b)
+ store <8 x i8> %min, ptr %ret_ptr
+ ret void
+}
+
+define void @test_umin_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_umin_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pminu.b a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %min = call <8 x i8> @llvm.umin.v4i8(<8 x i8> %a, <8 x i8> %b)
+ store <8 x i8> %min, ptr %ret_ptr
+ ret void
+}
+
+define void @test_smin_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_smin_w:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmin.w a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i32>, ptr %a_ptr
+ %b = load <2 x i32>, ptr %b_ptr
+ %min = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %a, <2 x i32> %b)
+ store <2 x i32> %min, ptr %ret_ptr
+ ret void
+}
+
+define void @test_umin_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_umin_w:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pminu.w a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i32>, ptr %a_ptr
+ %b = load <2 x i32>, ptr %b_ptr
+ %min = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %a, <2 x i32> %b)
+ store <2 x i32> %min, ptr %ret_ptr
+ ret void
+}
+
+define void @test_smax_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_smax_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmax.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %max = call <4 x i16> @llvm.smax.v2i16(<4 x i16> %a, <4 x i16> %b)
+ store <4 x i16> %max, ptr %ret_ptr
+ ret void
+}
+
+define void @test_umax_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_umax_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmaxu.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %max = call <4 x i16> @llvm.umax.v2i16(<4 x i16> %a, <4 x i16> %b)
+ store <4 x i16> %max, ptr %ret_ptr
+ ret void
+}
+
+define void @test_smax_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_smax_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmax.b a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %max = call <8 x i8> @llvm.smax.v4i8(<8 x i8> %a, <8 x i8> %b)
+ store <8 x i8> %max, ptr %ret_ptr
+ ret void
+}
+
+define void @test_umax_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_umax_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmaxu.b a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %max = call <8 x i8> @llvm.umax.v4i8(<8 x i8> %a, <8 x i8> %b)
+ store <8 x i8> %max, ptr %ret_ptr
+ ret void
+}
+
+define void @test_smax_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_smax_w:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmax.w a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i32>, ptr %a_ptr
+ %b = load <2 x i32>, ptr %b_ptr
+ %max = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %a, <2 x i32> %b)
+ store <2 x i32> %max, ptr %ret_ptr
+ ret void
+}
+
+define void @test_umax_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_umax_w:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmaxu.w a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i32>, ptr %a_ptr
+ %b = load <2 x i32>, ptr %b_ptr
+ %max = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %a, <2 x i32> %b)
+ store <2 x i32> %max, ptr %ret_ptr
+ ret void
+}
More information about the llvm-commits
mailing list