[llvm] [RISCV][llvm] Support min/max codegen for P extension (PR #175494)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 11 23:34:47 PST 2026
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-risc-v
Author: Brandon Wu (4vtomat)
<details>
<summary>Changes</summary>
---
Full diff: https://github.com/llvm/llvm-project/pull/175494.diff
4 Files Affected:
- (modified) llvm/lib/Target/RISCV/RISCVISelLowering.cpp (+2)
- (modified) llvm/lib/Target/RISCV/RISCVInstrInfoP.td (+16)
- (modified) llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll (+121)
- (modified) llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll (+181)
``````````diff
diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index db1973fc27326..26e4055aaf6a2 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -554,6 +554,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
setOperationAction({ISD::SDIV, ISD::UDIV, ISD::SREM, ISD::UREM,
ISD::SDIVREM, ISD::UDIVREM},
VTs, Expand);
+ setOperationAction({ISD::SMIN, ISD::UMIN, ISD::SMAX, ISD::UMAX}, VTs,
+ Legal);
setOperationAction(ISD::SETCC, VTs, Legal);
setCondCodeAction({ISD::SETNE, ISD::SETGT, ISD::SETGE, ISD::SETUGT,
ISD::SETUGE, ISD::SETULE, ISD::SETLE},
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index d2b122df62264..b3ad517fc40f9 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1612,6 +1612,16 @@ let Predicates = [HasStdExtP] in {
(PMSLT_H GPR:$rs1, GPR:$rs2)>;
def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETULT)),
(PMSLTU_H GPR:$rs1, GPR:$rs2)>;
+
+ // 8/16-bit [s|u]min/[s|u]max patterns
+ def: Pat<(XLenVecI8VT (smin GPR:$rs1, GPR:$rs2)), (PMIN_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (umin GPR:$rs1, GPR:$rs2)), (PMINU_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (smin GPR:$rs1, GPR:$rs2)), (PMIN_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (umin GPR:$rs1, GPR:$rs2)), (PMINU_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (smax GPR:$rs1, GPR:$rs2)), (PMAX_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI8VT (umax GPR:$rs1, GPR:$rs2)), (PMAXU_B GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (smax GPR:$rs1, GPR:$rs2)), (PMAX_H GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(XLenVecI16VT (umax GPR:$rs1, GPR:$rs2)), (PMAXU_H GPR:$rs1, GPR:$rs2)>;
} // Predicates = [HasStdExtP]
let Predicates = [HasStdExtP, IsRV32] in {
@@ -1710,6 +1720,12 @@ let Predicates = [HasStdExtP, IsRV64] in {
def: Pat<(v2i32 (setcc (v2i32 GPR:$rs2), (v2i32 GPR:$rs1), SETUGT)),
(PMSLTU_W GPR:$rs1, GPR:$rs2)>;
+ // 32-bit [s|u]min/[s|u]max patterns
+ def: Pat<(v2i32 (smin GPR:$rs1, GPR:$rs2)), (PMIN_W GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i32 (umin GPR:$rs1, GPR:$rs2)), (PMINU_W GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i32 (smax GPR:$rs1, GPR:$rs2)), (PMAX_W GPR:$rs1, GPR:$rs2)>;
+ def: Pat<(v2i32 (umax GPR:$rs1, GPR:$rs2)), (PMAXU_W GPR:$rs1, GPR:$rs2)>;
+
// 32-bit logical shift left/right patterns
def: Pat<(v2i32 (shl GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))),
(PSLLI_W GPR:$rs1, uimm5:$shamt)>;
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
index 10964a64b576a..bccb1a1a4e345 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
@@ -2019,3 +2019,124 @@ define void @test_uge_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
store <4 x i8> %res, ptr %ret_ptr
ret void
}
+
+; Test 8/16-bit [s|u]min/[s|u]max
+define void @test_smin_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_smin_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmin.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %min = call <2 x i16> @llvm.smin.v2i16(<2 x i16> %a, <2 x i16> %b)
+ store <2 x i16> %min, ptr %ret_ptr
+ ret void
+}
+
+define void @test_umin_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_umin_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pminu.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %min = call <2 x i16> @llvm.umin.v2i16(<2 x i16> %a, <2 x i16> %b)
+ store <2 x i16> %min, ptr %ret_ptr
+ ret void
+}
+
+define void @test_smin_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_smin_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmin.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %min = call <4 x i8> @llvm.smin.v4i8(<4 x i8> %a, <4 x i8> %b)
+ store <4 x i8> %min, ptr %ret_ptr
+ ret void
+}
+
+define void @test_umin_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_umin_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pminu.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %min = call <4 x i8> @llvm.umin.v4i8(<4 x i8> %a, <4 x i8> %b)
+ store <4 x i8> %min, ptr %ret_ptr
+ ret void
+}
+
+define void @test_smax_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_smax_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmax.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %max = call <2 x i16> @llvm.smax.v2i16(<2 x i16> %a, <2 x i16> %b)
+ store <2 x i16> %max, ptr %ret_ptr
+ ret void
+}
+
+define void @test_umax_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_umax_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmaxu.h a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i16>, ptr %a_ptr
+ %b = load <2 x i16>, ptr %b_ptr
+ %max = call <2 x i16> @llvm.umax.v2i16(<2 x i16> %a, <2 x i16> %b)
+ store <2 x i16> %max, ptr %ret_ptr
+ ret void
+}
+
+define void @test_smax_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_smax_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmax.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %max = call <4 x i8> @llvm.smax.v4i8(<4 x i8> %a, <4 x i8> %b)
+ store <4 x i8> %max, ptr %ret_ptr
+ ret void
+}
+
+define void @test_umax_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_umax_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: lw a1, 0(a1)
+; CHECK-NEXT: lw a2, 0(a2)
+; CHECK-NEXT: pmaxu.b a1, a1, a2
+; CHECK-NEXT: sw a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i8>, ptr %a_ptr
+ %b = load <4 x i8>, ptr %b_ptr
+ %max = call <4 x i8> @llvm.umax.v4i8(<4 x i8> %a, <4 x i8> %b)
+ store <4 x i8> %max, ptr %ret_ptr
+ ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
index 48eda85f375cb..83cba8f7b0bf0 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
@@ -2310,3 +2310,184 @@ define void @test_uge_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
store <2 x i32> %res, ptr %ret_ptr
ret void
}
+
+; Test 8/16/32-bit [s|u]min/[s|u]max
+define void @test_smin_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_smin_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmin.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %min = call <4 x i16> @llvm.smin.v2i16(<4 x i16> %a, <4 x i16> %b)
+ store <4 x i16> %min, ptr %ret_ptr
+ ret void
+}
+
+define void @test_umin_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_umin_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pminu.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %min = call <4 x i16> @llvm.umin.v2i16(<4 x i16> %a, <4 x i16> %b)
+ store <4 x i16> %min, ptr %ret_ptr
+ ret void
+}
+
+define void @test_smin_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_smin_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmin.b a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %min = call <8 x i8> @llvm.smin.v4i8(<8 x i8> %a, <8 x i8> %b)
+ store <8 x i8> %min, ptr %ret_ptr
+ ret void
+}
+
+define void @test_umin_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_umin_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pminu.b a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %min = call <8 x i8> @llvm.umin.v4i8(<8 x i8> %a, <8 x i8> %b)
+ store <8 x i8> %min, ptr %ret_ptr
+ ret void
+}
+
+define void @test_smin_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_smin_w:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmin.w a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i32>, ptr %a_ptr
+ %b = load <2 x i32>, ptr %b_ptr
+ %min = call <2 x i32> @llvm.smin.v2i32(<2 x i32> %a, <2 x i32> %b)
+ store <2 x i32> %min, ptr %ret_ptr
+ ret void
+}
+
+define void @test_umin_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_umin_w:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pminu.w a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i32>, ptr %a_ptr
+ %b = load <2 x i32>, ptr %b_ptr
+ %min = call <2 x i32> @llvm.umin.v2i32(<2 x i32> %a, <2 x i32> %b)
+ store <2 x i32> %min, ptr %ret_ptr
+ ret void
+}
+
+define void @test_smax_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_smax_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmax.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %max = call <4 x i16> @llvm.smax.v2i16(<4 x i16> %a, <4 x i16> %b)
+ store <4 x i16> %max, ptr %ret_ptr
+ ret void
+}
+
+define void @test_umax_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_umax_h:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmaxu.h a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <4 x i16>, ptr %a_ptr
+ %b = load <4 x i16>, ptr %b_ptr
+ %max = call <4 x i16> @llvm.umax.v2i16(<4 x i16> %a, <4 x i16> %b)
+ store <4 x i16> %max, ptr %ret_ptr
+ ret void
+}
+
+define void @test_smax_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_smax_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmax.b a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %max = call <8 x i8> @llvm.smax.v4i8(<8 x i8> %a, <8 x i8> %b)
+ store <8 x i8> %max, ptr %ret_ptr
+ ret void
+}
+
+define void @test_umax_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_umax_b:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmaxu.b a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <8 x i8>, ptr %a_ptr
+ %b = load <8 x i8>, ptr %b_ptr
+ %max = call <8 x i8> @llvm.umax.v4i8(<8 x i8> %a, <8 x i8> %b)
+ store <8 x i8> %max, ptr %ret_ptr
+ ret void
+}
+
+define void @test_smax_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_smax_w:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmax.w a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i32>, ptr %a_ptr
+ %b = load <2 x i32>, ptr %b_ptr
+ %max = call <2 x i32> @llvm.smax.v2i32(<2 x i32> %a, <2 x i32> %b)
+ store <2 x i32> %max, ptr %ret_ptr
+ ret void
+}
+
+define void @test_umax_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_umax_w:
+; CHECK: # %bb.0:
+; CHECK-NEXT: ld a1, 0(a1)
+; CHECK-NEXT: ld a2, 0(a2)
+; CHECK-NEXT: pmaxu.w a1, a1, a2
+; CHECK-NEXT: sd a1, 0(a0)
+; CHECK-NEXT: ret
+ %a = load <2 x i32>, ptr %a_ptr
+ %b = load <2 x i32>, ptr %b_ptr
+ %max = call <2 x i32> @llvm.umax.v2i32(<2 x i32> %a, <2 x i32> %b)
+ store <2 x i32> %max, ptr %ret_ptr
+ ret void
+}
``````````
</details>
https://github.com/llvm/llvm-project/pull/175494
More information about the llvm-commits
mailing list