[llvm] [RISCV][llvm] Support logical comparison codegen for P extension (PR #174626)

Brandon Wu via llvm-commits llvm-commits at lists.llvm.org
Sun Jan 11 22:13:30 PST 2026


https://github.com/4vtomat updated https://github.com/llvm/llvm-project/pull/174626

>From 7fd88ebbb6e09201f747c8c89840e90d0cd931df Mon Sep 17 00:00:00 2001
From: Brandon Wu <brandon.wu at sifive.com>
Date: Tue, 6 Jan 2026 08:54:01 -0800
Subject: [PATCH 1/3] [RISCV][llvm] Support logical comparison codegen for P
 extension

Result type of P extension's comparison instructions is same as operands
and the result bits are all 1s or 0s so we need to set
ZeroOrNegativeOneBooleanContent to make sext(setcc) auto combined.
---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp |   3 +
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td    |  76 +++
 llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll     | 332 +++++++++++++
 llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll     | 498 ++++++++++++++++++++
 4 files changed, 909 insertions(+)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index d6b62736bdf60..43598eec5137d 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -546,6 +546,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction({ISD::SHL, ISD::SRL, ISD::SRA}, VTs, Custom);
     setOperationAction(ISD::BITCAST, VTs, Custom);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VTs, Custom);
+    setOperationAction(ISD::SETCC, VTs, Legal);
+    // P extension vector comparisons produce all 1s for true, all 0s for false
+    setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
   }
 
   if (Subtarget.hasStdExtZfbfmin()) {
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 6830b476f5cd3..7a68707336050 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1584,6 +1584,54 @@ let Predicates = [HasStdExtP] in {
   // // splat pattern
   def: Pat<(XLenVecI8VT (splat_vector (XLenVT GPR:$rs2))), (PADD_BS (XLenVT X0), GPR:$rs2)>;
   def: Pat<(XLenVecI16VT (splat_vector (XLenVT GPR:$rs2))), (PADD_HS (XLenVT X0), GPR:$rs2)>;
+
+  // 8/16-bit comparison patterns (result is all 1s or all 0s per element)
+  // a == b
+  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETEQ)),
+           (PMSEQ_B GPR:$rs1, GPR:$rs2)>;
+  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETEQ)),
+           (PMSEQ_H GPR:$rs1, GPR:$rs2)>;
+  // a != b => !(a == b)
+  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETNE)),
+           (XORI (PMSEQ_B GPR:$rs1, GPR:$rs2), -1)>;
+  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETNE)),
+           (XORI (PMSEQ_H GPR:$rs1, GPR:$rs2), -1)>;
+  // a < b
+  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETLT)),
+           (PMSLT_B GPR:$rs1, GPR:$rs2)>;
+  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETULT)),
+           (PMSLTU_B GPR:$rs1, GPR:$rs2)>;
+  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETLT)),
+           (PMSLT_H GPR:$rs1, GPR:$rs2)>;
+  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETULT)),
+           (PMSLTU_H GPR:$rs1, GPR:$rs2)>;
+  // a <= b => !(b < a)
+  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETLE)),
+           (XORI (PMSLT_B GPR:$rs2, GPR:$rs1), -1)>;
+  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETULE)),
+           (XORI (PMSLTU_B GPR:$rs2, GPR:$rs1), -1)>;
+  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETLE)),
+           (XORI (PMSLT_H GPR:$rs2, GPR:$rs1), -1)>;
+  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETULE)),
+           (XORI (PMSLTU_H GPR:$rs2, GPR:$rs1), -1)>;
+  // a > b => b < a
+  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETGT)),
+           (PMSLT_B GPR:$rs2, GPR:$rs1)>;
+  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETUGT)),
+           (PMSLTU_B GPR:$rs2, GPR:$rs1)>;
+  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETGT)),
+           (PMSLT_H GPR:$rs2, GPR:$rs1)>;
+  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETUGT)),
+           (PMSLTU_H GPR:$rs2, GPR:$rs1)>;
+  // a >= b => !(a < b)
+  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETGE)),
+           (XORI (PMSLT_B GPR:$rs1, GPR:$rs2), -1)>;
+  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETUGE)),
+           (XORI (PMSLTU_B GPR:$rs1, GPR:$rs2), -1)>;
+  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETGE)),
+           (XORI (PMSLT_H GPR:$rs1, GPR:$rs2), -1)>;
+  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETUGE)),
+           (XORI (PMSLTU_H GPR:$rs1, GPR:$rs2), -1)>;
 } // Predicates = [HasStdExtP]
 
 let Predicates = [HasStdExtP, IsRV32] in {
@@ -1643,6 +1691,34 @@ let Predicates = [HasStdExtP, IsRV64] in {
   // splat pattern
   def: Pat<(v2i32 (splat_vector (XLenVT GPR:$rs2))), (PADD_WS (XLenVT X0), GPR:$rs2)>;
 
+  // 32-bit comparison patterns (result is all 1s or all 0s per element)
+  // a == b
+  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETEQ)),
+           (PMSEQ_W GPR:$rs1, GPR:$rs2)>;
+  // a != b => !(a == b)
+  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETNE)),
+           (XORI (PMSEQ_W GPR:$rs1, GPR:$rs2), -1)>;
+  // a < b
+  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETLT)),
+           (PMSLT_W GPR:$rs1, GPR:$rs2)>;
+  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETULT)),
+           (PMSLTU_W GPR:$rs1, GPR:$rs2)>;
+  // a <= b => !(b < a)
+  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETLE)),
+           (XORI (PMSLT_W GPR:$rs2, GPR:$rs1), -1)>;
+  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETULE)),
+           (XORI (PMSLTU_W GPR:$rs2, GPR:$rs1), -1)>;
+  // a > b => b < a
+  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETGT)),
+           (PMSLT_W GPR:$rs2, GPR:$rs1)>;
+  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETUGT)),
+           (PMSLTU_W GPR:$rs2, GPR:$rs1)>;
+  // a >= b => !(a < b)
+  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETGE)),
+           (XORI (PMSLT_W GPR:$rs1, GPR:$rs2), -1)>;
+  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETUGE)),
+           (XORI (PMSLTU_W GPR:$rs1, GPR:$rs2), -1)>;
+
   // 32-bit logical shift left/right patterns
   def: Pat<(v2i32 (shl GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))),
            (PSLLI_W GPR:$rs1, uimm5:$shamt)>;
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
index 2836cda16b6d9..a1728e72ce3b9 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
@@ -1118,3 +1118,335 @@ define void @test_pmulhsu_h_commuted(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
   store <2 x i16> %res, ptr %ret_ptr
   ret void
 }
+
+; Comparison operations for v2i16
+define void @test_eq_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_eq_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a1)
+; CHECK-NEXT:    lw a2, 0(a2)
+; CHECK-NEXT:    pmseq.h a1, a1, a2
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i16>, ptr %a_ptr
+  %b = load <2 x i16>, ptr %b_ptr
+  %cmp = icmp eq <2 x i16> %a, %b
+  %res = sext <2 x i1> %cmp to <2 x i16>
+  store <2 x i16> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_ne_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ne_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a1)
+; CHECK-NEXT:    lw a2, 0(a2)
+; CHECK-NEXT:    pmseq.h a1, a1, a2
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i16>, ptr %a_ptr
+  %b = load <2 x i16>, ptr %b_ptr
+  %cmp = icmp ne <2 x i16> %a, %b
+  %res = sext <2 x i1> %cmp to <2 x i16>
+  store <2 x i16> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_slt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_slt_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a1)
+; CHECK-NEXT:    lw a2, 0(a2)
+; CHECK-NEXT:    pmslt.h a1, a1, a2
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i16>, ptr %a_ptr
+  %b = load <2 x i16>, ptr %b_ptr
+  %cmp = icmp slt <2 x i16> %a, %b
+  %res = sext <2 x i1> %cmp to <2 x i16>
+  store <2 x i16> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_sle_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sle_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a1)
+; CHECK-NEXT:    lw a2, 0(a2)
+; CHECK-NEXT:    pmslt.h a1, a2, a1
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i16>, ptr %a_ptr
+  %b = load <2 x i16>, ptr %b_ptr
+  %cmp = icmp sle <2 x i16> %a, %b
+  %res = sext <2 x i1> %cmp to <2 x i16>
+  store <2 x i16> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_sgt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sgt_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a1)
+; CHECK-NEXT:    lw a2, 0(a2)
+; CHECK-NEXT:    pmslt.h a1, a2, a1
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i16>, ptr %a_ptr
+  %b = load <2 x i16>, ptr %b_ptr
+  %cmp = icmp sgt <2 x i16> %a, %b
+  %res = sext <2 x i1> %cmp to <2 x i16>
+  store <2 x i16> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_sge_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sge_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a1)
+; CHECK-NEXT:    lw a2, 0(a2)
+; CHECK-NEXT:    pmslt.h a1, a1, a2
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i16>, ptr %a_ptr
+  %b = load <2 x i16>, ptr %b_ptr
+  %cmp = icmp sge <2 x i16> %a, %b
+  %res = sext <2 x i1> %cmp to <2 x i16>
+  store <2 x i16> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_ult_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ult_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a1)
+; CHECK-NEXT:    lw a2, 0(a2)
+; CHECK-NEXT:    pmsltu.h a1, a1, a2
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i16>, ptr %a_ptr
+  %b = load <2 x i16>, ptr %b_ptr
+  %cmp = icmp ult <2 x i16> %a, %b
+  %res = sext <2 x i1> %cmp to <2 x i16>
+  store <2 x i16> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_ule_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ule_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a1)
+; CHECK-NEXT:    lw a2, 0(a2)
+; CHECK-NEXT:    pmsltu.h a1, a2, a1
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i16>, ptr %a_ptr
+  %b = load <2 x i16>, ptr %b_ptr
+  %cmp = icmp ule <2 x i16> %a, %b
+  %res = sext <2 x i1> %cmp to <2 x i16>
+  store <2 x i16> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_ugt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ugt_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a1)
+; CHECK-NEXT:    lw a2, 0(a2)
+; CHECK-NEXT:    pmsltu.h a1, a2, a1
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i16>, ptr %a_ptr
+  %b = load <2 x i16>, ptr %b_ptr
+  %cmp = icmp ugt <2 x i16> %a, %b
+  %res = sext <2 x i1> %cmp to <2 x i16>
+  store <2 x i16> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_uge_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_uge_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a1)
+; CHECK-NEXT:    lw a2, 0(a2)
+; CHECK-NEXT:    pmsltu.h a1, a1, a2
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i16>, ptr %a_ptr
+  %b = load <2 x i16>, ptr %b_ptr
+  %cmp = icmp uge <2 x i16> %a, %b
+  %res = sext <2 x i1> %cmp to <2 x i16>
+  store <2 x i16> %res, ptr %ret_ptr
+  ret void
+}
+
+; Comparison operations for v4i8
+define void @test_eq_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_eq_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a1)
+; CHECK-NEXT:    lw a2, 0(a2)
+; CHECK-NEXT:    pmseq.b a1, a1, a2
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x i8>, ptr %a_ptr
+  %b = load <4 x i8>, ptr %b_ptr
+  %cmp = icmp eq <4 x i8> %a, %b
+  %res = sext <4 x i1> %cmp to <4 x i8>
+  store <4 x i8> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_ne_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ne_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a1)
+; CHECK-NEXT:    lw a2, 0(a2)
+; CHECK-NEXT:    pmseq.b a1, a1, a2
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x i8>, ptr %a_ptr
+  %b = load <4 x i8>, ptr %b_ptr
+  %cmp = icmp ne <4 x i8> %a, %b
+  %res = sext <4 x i1> %cmp to <4 x i8>
+  store <4 x i8> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_slt_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_slt_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a1)
+; CHECK-NEXT:    lw a2, 0(a2)
+; CHECK-NEXT:    pmslt.b a1, a1, a2
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x i8>, ptr %a_ptr
+  %b = load <4 x i8>, ptr %b_ptr
+  %cmp = icmp slt <4 x i8> %a, %b
+  %res = sext <4 x i1> %cmp to <4 x i8>
+  store <4 x i8> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_sle_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sle_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a1)
+; CHECK-NEXT:    lw a2, 0(a2)
+; CHECK-NEXT:    pmslt.b a1, a2, a1
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x i8>, ptr %a_ptr
+  %b = load <4 x i8>, ptr %b_ptr
+  %cmp = icmp sle <4 x i8> %a, %b
+  %res = sext <4 x i1> %cmp to <4 x i8>
+  store <4 x i8> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_sgt_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sgt_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a1)
+; CHECK-NEXT:    lw a2, 0(a2)
+; CHECK-NEXT:    pmslt.b a1, a2, a1
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x i8>, ptr %a_ptr
+  %b = load <4 x i8>, ptr %b_ptr
+  %cmp = icmp sgt <4 x i8> %a, %b
+  %res = sext <4 x i1> %cmp to <4 x i8>
+  store <4 x i8> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_sge_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sge_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a1)
+; CHECK-NEXT:    lw a2, 0(a2)
+; CHECK-NEXT:    pmslt.b a1, a1, a2
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x i8>, ptr %a_ptr
+  %b = load <4 x i8>, ptr %b_ptr
+  %cmp = icmp sge <4 x i8> %a, %b
+  %res = sext <4 x i1> %cmp to <4 x i8>
+  store <4 x i8> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_ult_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ult_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a1)
+; CHECK-NEXT:    lw a2, 0(a2)
+; CHECK-NEXT:    pmsltu.b a1, a1, a2
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x i8>, ptr %a_ptr
+  %b = load <4 x i8>, ptr %b_ptr
+  %cmp = icmp ult <4 x i8> %a, %b
+  %res = sext <4 x i1> %cmp to <4 x i8>
+  store <4 x i8> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_ule_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ule_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a1)
+; CHECK-NEXT:    lw a2, 0(a2)
+; CHECK-NEXT:    pmsltu.b a1, a2, a1
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x i8>, ptr %a_ptr
+  %b = load <4 x i8>, ptr %b_ptr
+  %cmp = icmp ule <4 x i8> %a, %b
+  %res = sext <4 x i1> %cmp to <4 x i8>
+  store <4 x i8> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_ugt_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ugt_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a1)
+; CHECK-NEXT:    lw a2, 0(a2)
+; CHECK-NEXT:    pmsltu.b a1, a2, a1
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x i8>, ptr %a_ptr
+  %b = load <4 x i8>, ptr %b_ptr
+  %cmp = icmp ugt <4 x i8> %a, %b
+  %res = sext <4 x i1> %cmp to <4 x i8>
+  store <4 x i8> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_uge_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_uge_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    lw a1, 0(a1)
+; CHECK-NEXT:    lw a2, 0(a2)
+; CHECK-NEXT:    pmsltu.b a1, a1, a2
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sw a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x i8>, ptr %a_ptr
+  %b = load <4 x i8>, ptr %b_ptr
+  %cmp = icmp uge <4 x i8> %a, %b
+  %res = sext <4 x i1> %cmp to <4 x i8>
+  store <4 x i8> %res, ptr %ret_ptr
+  ret void
+}
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
index dfa1b242e656f..e08871b4e63bf 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
@@ -1148,3 +1148,501 @@ define void @test_pmulhsu_w_commuted(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
   store <2 x i32> %res, ptr %ret_ptr
   ret void
 }
+
+; Comparison operations for v4i16
+define void @test_eq_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_eq_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmseq.h a1, a1, a2
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x i16>, ptr %a_ptr
+  %b = load <4 x i16>, ptr %b_ptr
+  %cmp = icmp eq <4 x i16> %a, %b
+  %res = sext <4 x i1> %cmp to <4 x i16>
+  store <4 x i16> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_ne_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ne_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmseq.h a1, a1, a2
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x i16>, ptr %a_ptr
+  %b = load <4 x i16>, ptr %b_ptr
+  %cmp = icmp ne <4 x i16> %a, %b
+  %res = sext <4 x i1> %cmp to <4 x i16>
+  store <4 x i16> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_slt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_slt_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmslt.h a1, a1, a2
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x i16>, ptr %a_ptr
+  %b = load <4 x i16>, ptr %b_ptr
+  %cmp = icmp slt <4 x i16> %a, %b
+  %res = sext <4 x i1> %cmp to <4 x i16>
+  store <4 x i16> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_sle_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sle_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmslt.h a1, a2, a1
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x i16>, ptr %a_ptr
+  %b = load <4 x i16>, ptr %b_ptr
+  %cmp = icmp sle <4 x i16> %a, %b
+  %res = sext <4 x i1> %cmp to <4 x i16>
+  store <4 x i16> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_sgt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sgt_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmslt.h a1, a2, a1
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x i16>, ptr %a_ptr
+  %b = load <4 x i16>, ptr %b_ptr
+  %cmp = icmp sgt <4 x i16> %a, %b
+  %res = sext <4 x i1> %cmp to <4 x i16>
+  store <4 x i16> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_sge_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sge_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmslt.h a1, a1, a2
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x i16>, ptr %a_ptr
+  %b = load <4 x i16>, ptr %b_ptr
+  %cmp = icmp sge <4 x i16> %a, %b
+  %res = sext <4 x i1> %cmp to <4 x i16>
+  store <4 x i16> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_ult_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ult_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmsltu.h a1, a1, a2
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x i16>, ptr %a_ptr
+  %b = load <4 x i16>, ptr %b_ptr
+  %cmp = icmp ult <4 x i16> %a, %b
+  %res = sext <4 x i1> %cmp to <4 x i16>
+  store <4 x i16> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_ule_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ule_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmsltu.h a1, a2, a1
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x i16>, ptr %a_ptr
+  %b = load <4 x i16>, ptr %b_ptr
+  %cmp = icmp ule <4 x i16> %a, %b
+  %res = sext <4 x i1> %cmp to <4 x i16>
+  store <4 x i16> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_ugt_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ugt_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmsltu.h a1, a2, a1
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x i16>, ptr %a_ptr
+  %b = load <4 x i16>, ptr %b_ptr
+  %cmp = icmp ugt <4 x i16> %a, %b
+  %res = sext <4 x i1> %cmp to <4 x i16>
+  store <4 x i16> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_uge_h(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_uge_h:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmsltu.h a1, a1, a2
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <4 x i16>, ptr %a_ptr
+  %b = load <4 x i16>, ptr %b_ptr
+  %cmp = icmp uge <4 x i16> %a, %b
+  %res = sext <4 x i1> %cmp to <4 x i16>
+  store <4 x i16> %res, ptr %ret_ptr
+  ret void
+}
+
+; Comparison operations for v8i8
+define void @test_eq_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_eq_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmseq.b a1, a1, a2
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <8 x i8>, ptr %a_ptr
+  %b = load <8 x i8>, ptr %b_ptr
+  %cmp = icmp eq <8 x i8> %a, %b
+  %res = sext <8 x i1> %cmp to <8 x i8>
+  store <8 x i8> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_ne_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ne_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmseq.b a1, a1, a2
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <8 x i8>, ptr %a_ptr
+  %b = load <8 x i8>, ptr %b_ptr
+  %cmp = icmp ne <8 x i8> %a, %b
+  %res = sext <8 x i1> %cmp to <8 x i8>
+  store <8 x i8> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_slt_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_slt_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmslt.b a1, a1, a2
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <8 x i8>, ptr %a_ptr
+  %b = load <8 x i8>, ptr %b_ptr
+  %cmp = icmp slt <8 x i8> %a, %b
+  %res = sext <8 x i1> %cmp to <8 x i8>
+  store <8 x i8> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_sle_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sle_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmslt.b a1, a2, a1
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <8 x i8>, ptr %a_ptr
+  %b = load <8 x i8>, ptr %b_ptr
+  %cmp = icmp sle <8 x i8> %a, %b
+  %res = sext <8 x i1> %cmp to <8 x i8>
+  store <8 x i8> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_sgt_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sgt_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmslt.b a1, a2, a1
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <8 x i8>, ptr %a_ptr
+  %b = load <8 x i8>, ptr %b_ptr
+  %cmp = icmp sgt <8 x i8> %a, %b
+  %res = sext <8 x i1> %cmp to <8 x i8>
+  store <8 x i8> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_sge_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sge_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmslt.b a1, a1, a2
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <8 x i8>, ptr %a_ptr
+  %b = load <8 x i8>, ptr %b_ptr
+  %cmp = icmp sge <8 x i8> %a, %b
+  %res = sext <8 x i1> %cmp to <8 x i8>
+  store <8 x i8> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_ult_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ult_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmsltu.b a1, a1, a2
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <8 x i8>, ptr %a_ptr
+  %b = load <8 x i8>, ptr %b_ptr
+  %cmp = icmp ult <8 x i8> %a, %b
+  %res = sext <8 x i1> %cmp to <8 x i8>
+  store <8 x i8> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_ule_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ule_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmsltu.b a1, a2, a1
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <8 x i8>, ptr %a_ptr
+  %b = load <8 x i8>, ptr %b_ptr
+  %cmp = icmp ule <8 x i8> %a, %b
+  %res = sext <8 x i1> %cmp to <8 x i8>
+  store <8 x i8> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_ugt_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ugt_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmsltu.b a1, a2, a1
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <8 x i8>, ptr %a_ptr
+  %b = load <8 x i8>, ptr %b_ptr
+  %cmp = icmp ugt <8 x i8> %a, %b
+  %res = sext <8 x i1> %cmp to <8 x i8>
+  store <8 x i8> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_uge_b(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_uge_b:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmsltu.b a1, a1, a2
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <8 x i8>, ptr %a_ptr
+  %b = load <8 x i8>, ptr %b_ptr
+  %cmp = icmp uge <8 x i8> %a, %b
+  %res = sext <8 x i1> %cmp to <8 x i8>
+  store <8 x i8> %res, ptr %ret_ptr
+  ret void
+}
+
+; Comparison operations for v2i32
+define void @test_eq_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_eq_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmseq.w a1, a1, a2
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i32>, ptr %a_ptr
+  %b = load <2 x i32>, ptr %b_ptr
+  %cmp = icmp eq <2 x i32> %a, %b
+  %res = sext <2 x i1> %cmp to <2 x i32>
+  store <2 x i32> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_ne_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ne_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmseq.w a1, a1, a2
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i32>, ptr %a_ptr
+  %b = load <2 x i32>, ptr %b_ptr
+  %cmp = icmp ne <2 x i32> %a, %b
+  %res = sext <2 x i1> %cmp to <2 x i32>
+  store <2 x i32> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_slt_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_slt_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmslt.w a1, a1, a2
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i32>, ptr %a_ptr
+  %b = load <2 x i32>, ptr %b_ptr
+  %cmp = icmp slt <2 x i32> %a, %b
+  %res = sext <2 x i1> %cmp to <2 x i32>
+  store <2 x i32> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_sle_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sle_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmslt.w a1, a2, a1
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i32>, ptr %a_ptr
+  %b = load <2 x i32>, ptr %b_ptr
+  %cmp = icmp sle <2 x i32> %a, %b
+  %res = sext <2 x i1> %cmp to <2 x i32>
+  store <2 x i32> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_sgt_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sgt_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmslt.w a1, a2, a1
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i32>, ptr %a_ptr
+  %b = load <2 x i32>, ptr %b_ptr
+  %cmp = icmp sgt <2 x i32> %a, %b
+  %res = sext <2 x i1> %cmp to <2 x i32>
+  store <2 x i32> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_sge_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_sge_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmslt.w a1, a1, a2
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i32>, ptr %a_ptr
+  %b = load <2 x i32>, ptr %b_ptr
+  %cmp = icmp sge <2 x i32> %a, %b
+  %res = sext <2 x i1> %cmp to <2 x i32>
+  store <2 x i32> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_ult_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ult_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmsltu.w a1, a1, a2
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i32>, ptr %a_ptr
+  %b = load <2 x i32>, ptr %b_ptr
+  %cmp = icmp ult <2 x i32> %a, %b
+  %res = sext <2 x i1> %cmp to <2 x i32>
+  store <2 x i32> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_ule_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ule_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmsltu.w a1, a2, a1
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i32>, ptr %a_ptr
+  %b = load <2 x i32>, ptr %b_ptr
+  %cmp = icmp ule <2 x i32> %a, %b
+  %res = sext <2 x i1> %cmp to <2 x i32>
+  store <2 x i32> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_ugt_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_ugt_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmsltu.w a1, a2, a1
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i32>, ptr %a_ptr
+  %b = load <2 x i32>, ptr %b_ptr
+  %cmp = icmp ugt <2 x i32> %a, %b
+  %res = sext <2 x i1> %cmp to <2 x i32>
+  store <2 x i32> %res, ptr %ret_ptr
+  ret void
+}
+
+define void @test_uge_w(ptr %ret_ptr, ptr %a_ptr, ptr %b_ptr) {
+; CHECK-LABEL: test_uge_w:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    ld a1, 0(a1)
+; CHECK-NEXT:    ld a2, 0(a2)
+; CHECK-NEXT:    pmsltu.w a1, a1, a2
+; CHECK-NEXT:    not a1, a1
+; CHECK-NEXT:    sd a1, 0(a0)
+; CHECK-NEXT:    ret
+  %a = load <2 x i32>, ptr %a_ptr
+  %b = load <2 x i32>, ptr %b_ptr
+  %cmp = icmp uge <2 x i32> %a, %b
+  %res = sext <2 x i1> %cmp to <2 x i32>
+  store <2 x i32> %res, ptr %ret_ptr
+  ret void
+}

>From ebdfb3c1c8716b9458c00f00cfb6302f93bce6d6 Mon Sep 17 00:00:00 2001
From: Brandon Wu <brandon.wu at sifive.com>
Date: Wed, 7 Jan 2026 00:08:47 -0800
Subject: [PATCH 2/3] fixup! reorder pattern

---
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 48 ++++++++++++------------
 1 file changed, 24 insertions(+), 24 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 7a68707336050..d07cee12a13bf 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1606,23 +1606,23 @@ let Predicates = [HasStdExtP] in {
   def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETULT)),
            (PMSLTU_H GPR:$rs1, GPR:$rs2)>;
   // a <= b => !(b < a)
-  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETLE)),
-           (XORI (PMSLT_B GPR:$rs2, GPR:$rs1), -1)>;
-  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETULE)),
-           (XORI (PMSLTU_B GPR:$rs2, GPR:$rs1), -1)>;
-  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETLE)),
-           (XORI (PMSLT_H GPR:$rs2, GPR:$rs1), -1)>;
-  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETULE)),
-           (XORI (PMSLTU_H GPR:$rs2, GPR:$rs1), -1)>;
+  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs2), (XLenVecI8VT GPR:$rs1), SETLE)),
+           (XORI (PMSLT_B GPR:$rs1, GPR:$rs2), -1)>;
+  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs2), (XLenVecI8VT GPR:$rs1), SETULE)),
+           (XORI (PMSLTU_B GPR:$rs1, GPR:$rs2), -1)>;
+  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs2), (XLenVecI16VT GPR:$rs1), SETLE)),
+           (XORI (PMSLT_H GPR:$rs1, GPR:$rs2), -1)>;
+  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs2), (XLenVecI16VT GPR:$rs1), SETULE)),
+           (XORI (PMSLTU_H GPR:$rs1, GPR:$rs2), -1)>;
   // a > b => b < a
-  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETGT)),
-           (PMSLT_B GPR:$rs2, GPR:$rs1)>;
-  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETUGT)),
-           (PMSLTU_B GPR:$rs2, GPR:$rs1)>;
-  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETGT)),
-           (PMSLT_H GPR:$rs2, GPR:$rs1)>;
-  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETUGT)),
-           (PMSLTU_H GPR:$rs2, GPR:$rs1)>;
+  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs2), (XLenVecI8VT GPR:$rs1), SETGT)),
+           (PMSLT_B GPR:$rs1, GPR:$rs2)>;
+  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs2), (XLenVecI8VT GPR:$rs1), SETUGT)),
+           (PMSLTU_B GPR:$rs1, GPR:$rs2)>;
+  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs2), (XLenVecI16VT GPR:$rs1), SETGT)),
+           (PMSLT_H GPR:$rs1, GPR:$rs2)>;
+  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs2), (XLenVecI16VT GPR:$rs1), SETUGT)),
+           (PMSLTU_H GPR:$rs1, GPR:$rs2)>;
   // a >= b => !(a < b)
   def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETGE)),
            (XORI (PMSLT_B GPR:$rs1, GPR:$rs2), -1)>;
@@ -1704,15 +1704,15 @@ let Predicates = [HasStdExtP, IsRV64] in {
   def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETULT)),
            (PMSLTU_W GPR:$rs1, GPR:$rs2)>;
   // a <= b => !(b < a)
-  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETLE)),
-           (XORI (PMSLT_W GPR:$rs2, GPR:$rs1), -1)>;
-  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETULE)),
-           (XORI (PMSLTU_W GPR:$rs2, GPR:$rs1), -1)>;
+  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs2), (v2i32 GPR:$rs1), SETLE)),
+           (XORI (PMSLT_W GPR:$rs1, GPR:$rs2), -1)>;
+  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs2), (v2i32 GPR:$rs1), SETULE)),
+           (XORI (PMSLTU_W GPR:$rs1, GPR:$rs2), -1)>;
   // a > b => b < a
-  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETGT)),
-           (PMSLT_W GPR:$rs2, GPR:$rs1)>;
-  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETUGT)),
-           (PMSLTU_W GPR:$rs2, GPR:$rs1)>;
+  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs2), (v2i32 GPR:$rs1), SETGT)),
+           (PMSLT_W GPR:$rs1, GPR:$rs2)>;
+  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs2), (v2i32 GPR:$rs1), SETUGT)),
+           (PMSLTU_W GPR:$rs1, GPR:$rs2)>;
   // a >= b => !(a < b)
   def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETGE)),
            (XORI (PMSLT_W GPR:$rs1, GPR:$rs2), -1)>;

>From d5d3d7f343aa6fd7e67e8aba159b22321d8f54dd Mon Sep 17 00:00:00 2001
From: Brandon Wu <brandon.wu at sifive.com>
Date: Wed, 7 Jan 2026 23:51:06 -0800
Subject: [PATCH 3/3] fixup! leave only beq, blt and bltu pattern

---
 llvm/lib/Target/RISCV/RISCVISelLowering.cpp |  3 ++
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td    | 45 ---------------------
 2 files changed, 3 insertions(+), 45 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
index a3a5432f32719..bbcf8c7476267 100644
--- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
+++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp
@@ -547,6 +547,9 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
     setOperationAction(ISD::BITCAST, VTs, Custom);
     setOperationAction(ISD::EXTRACT_VECTOR_ELT, VTs, Custom);
     setOperationAction(ISD::SETCC, VTs, Legal);
+    setCondCodeAction({ISD::SETNE, ISD::SETGT, ISD::SETGE, ISD::SETUGT,
+                       ISD::SETUGE, ISD::SETULE, ISD::SETLE},
+                      VTs, Expand);
     // P extension vector comparisons produce all 1s for true, all 0s for false
     setBooleanVectorContents(ZeroOrNegativeOneBooleanContent);
   }
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 19fbd17518af4..4fccedf9749c3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1603,11 +1603,6 @@ let Predicates = [HasStdExtP] in {
            (PMSEQ_B GPR:$rs1, GPR:$rs2)>;
   def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETEQ)),
            (PMSEQ_H GPR:$rs1, GPR:$rs2)>;
-  // a != b => !(a == b)
-  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETNE)),
-           (XORI (PMSEQ_B GPR:$rs1, GPR:$rs2), -1)>;
-  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETNE)),
-           (XORI (PMSEQ_H GPR:$rs1, GPR:$rs2), -1)>;
   // a < b
   def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETLT)),
            (PMSLT_B GPR:$rs1, GPR:$rs2)>;
@@ -1617,33 +1612,6 @@ let Predicates = [HasStdExtP] in {
            (PMSLT_H GPR:$rs1, GPR:$rs2)>;
   def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETULT)),
            (PMSLTU_H GPR:$rs1, GPR:$rs2)>;
-  // a <= b => !(b < a)
-  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs2), (XLenVecI8VT GPR:$rs1), SETLE)),
-           (XORI (PMSLT_B GPR:$rs1, GPR:$rs2), -1)>;
-  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs2), (XLenVecI8VT GPR:$rs1), SETULE)),
-           (XORI (PMSLTU_B GPR:$rs1, GPR:$rs2), -1)>;
-  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs2), (XLenVecI16VT GPR:$rs1), SETLE)),
-           (XORI (PMSLT_H GPR:$rs1, GPR:$rs2), -1)>;
-  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs2), (XLenVecI16VT GPR:$rs1), SETULE)),
-           (XORI (PMSLTU_H GPR:$rs1, GPR:$rs2), -1)>;
-  // a > b => b < a
-  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs2), (XLenVecI8VT GPR:$rs1), SETGT)),
-           (PMSLT_B GPR:$rs1, GPR:$rs2)>;
-  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs2), (XLenVecI8VT GPR:$rs1), SETUGT)),
-           (PMSLTU_B GPR:$rs1, GPR:$rs2)>;
-  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs2), (XLenVecI16VT GPR:$rs1), SETGT)),
-           (PMSLT_H GPR:$rs1, GPR:$rs2)>;
-  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs2), (XLenVecI16VT GPR:$rs1), SETUGT)),
-           (PMSLTU_H GPR:$rs1, GPR:$rs2)>;
-  // a >= b => !(a < b)
-  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETGE)),
-           (XORI (PMSLT_B GPR:$rs1, GPR:$rs2), -1)>;
-  def: Pat<(XLenVecI8VT (setcc (XLenVecI8VT GPR:$rs1), (XLenVecI8VT GPR:$rs2), SETUGE)),
-           (XORI (PMSLTU_B GPR:$rs1, GPR:$rs2), -1)>;
-  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETGE)),
-           (XORI (PMSLT_H GPR:$rs1, GPR:$rs2), -1)>;
-  def: Pat<(XLenVecI16VT (setcc (XLenVecI16VT GPR:$rs1), (XLenVecI16VT GPR:$rs2), SETUGE)),
-           (XORI (PMSLTU_H GPR:$rs1, GPR:$rs2), -1)>;
 } // Predicates = [HasStdExtP]
 
 let Predicates = [HasStdExtP, IsRV32] in {
@@ -1713,29 +1681,16 @@ let Predicates = [HasStdExtP, IsRV64] in {
   // a == b
   def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETEQ)),
            (PMSEQ_W GPR:$rs1, GPR:$rs2)>;
-  // a != b => !(a == b)
-  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETNE)),
-           (XORI (PMSEQ_W GPR:$rs1, GPR:$rs2), -1)>;
   // a < b
   def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETLT)),
            (PMSLT_W GPR:$rs1, GPR:$rs2)>;
   def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETULT)),
            (PMSLTU_W GPR:$rs1, GPR:$rs2)>;
-  // a <= b => !(b < a)
-  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs2), (v2i32 GPR:$rs1), SETLE)),
-           (XORI (PMSLT_W GPR:$rs1, GPR:$rs2), -1)>;
-  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs2), (v2i32 GPR:$rs1), SETULE)),
-           (XORI (PMSLTU_W GPR:$rs1, GPR:$rs2), -1)>;
   // a > b => b < a
   def: Pat<(v2i32 (setcc (v2i32 GPR:$rs2), (v2i32 GPR:$rs1), SETGT)),
            (PMSLT_W GPR:$rs1, GPR:$rs2)>;
   def: Pat<(v2i32 (setcc (v2i32 GPR:$rs2), (v2i32 GPR:$rs1), SETUGT)),
            (PMSLTU_W GPR:$rs1, GPR:$rs2)>;
-  // a >= b => !(a < b)
-  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETGE)),
-           (XORI (PMSLT_W GPR:$rs1, GPR:$rs2), -1)>;
-  def: Pat<(v2i32 (setcc (v2i32 GPR:$rs1), (v2i32 GPR:$rs2), SETUGE)),
-           (XORI (PMSLTU_W GPR:$rs1, GPR:$rs2), -1)>;
 
   // 32-bit logical shift left/right patterns
   def: Pat<(v2i32 (shl GPR:$rs1, (v2i32 (splat_vector uimm5:$shamt)))),



More information about the llvm-commits mailing list