[llvm] [llvm][RISCV] Support P Extension CodeGen (PR #190303)

Thu Apr 2 22:21:45 PDT 2026

https://github.com/sihuan updated https://github.com/llvm/llvm-project/pull/190303

>From 9884ac0bf2ab8e4815ad7c31ba015140e259e5a2 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Fri, 3 Apr 2026 03:15:29 +0000
Subject: [PATCH 1/4] [llvm][RISCV] Support P Extension CodeGen

Select `add(vec, splat(scalar))` to `PADD_*S`
---
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 24 +++++++++++++
 llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll  | 31 +++++++++++++---
 llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll  | 46 ++++++++++++++++++++----
 3 files changed, 91 insertions(+), 10 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index f6dfb856fef7b..62e2c158f8011 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1796,10 +1796,26 @@ let Predicates = [HasStdExtP] in {
   // Basic 8-bit arithmetic patterns
   def : PatGprGpr<add, PADD_B, XLenVecI8VT>;
   def : PatGprGpr<sub, PSUB_B, XLenVecI8VT>;
+  def : Pat<(XLenVecI8VT
+              (add (XLenVecI8VT GPR:$rs1),
+                   (XLenVecI8VT (splat_vector (XLenVT GPR:$rs2))))),
+            (PADD_BS GPR:$rs1, GPR:$rs2)>;
+  def : Pat<(XLenVecI8VT
+              (add (XLenVecI8VT (splat_vector (XLenVT GPR:$rs2))),
+                   (XLenVecI8VT GPR:$rs1))),
+            (PADD_BS GPR:$rs1, GPR:$rs2)>;
 
   // Basic 16-bit arithmetic patterns
   def : PatGprGpr<add, PADD_H, XLenVecI16VT>;
   def : PatGprGpr<sub, PSUB_H, XLenVecI16VT>;
+  def : Pat<(XLenVecI16VT
+              (add (XLenVecI16VT GPR:$rs1),
+                   (XLenVecI16VT (splat_vector (XLenVT GPR:$rs2))))),
+            (PADD_HS GPR:$rs1, GPR:$rs2)>;
+  def : Pat<(XLenVecI16VT
+              (add (XLenVecI16VT (splat_vector (XLenVT GPR:$rs2))),
+                   (XLenVecI16VT GPR:$rs1))),
+            (PADD_HS GPR:$rs1, GPR:$rs2)>;
 
   // 8-bit bitwise operation patterns
   def : PatGprGpr<and, AND, XLenVecI8VT>;
@@ -2014,6 +2030,14 @@ let Predicates = [HasStdExtP, IsRV64] in {
   // Basic 32-bit arithmetic patterns
   def : PatGprGpr<add, PADD_W, v2i32>;
   def : PatGprGpr<sub, PSUB_W, v2i32>;
+  def : Pat<(v2i32
+              (add (v2i32 GPR:$rs1),
+                   (v2i32 (splat_vector (XLenVT GPR:$rs2))))),
+            (PADD_WS GPR:$rs1, GPR:$rs2)>;
+  def : Pat<(v2i32
+              (add (v2i32 (splat_vector (XLenVT GPR:$rs2))),
+                   (v2i32 GPR:$rs1))),
+            (PADD_WS GPR:$rs1, GPR:$rs2)>;
 
   // 32-bit bitwise operation patterns
   def : PatGprGpr<and, AND, v2i32>;
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
index e70eea7263325..ef7a64a0227a4 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
@@ -625,6 +625,29 @@ define <2 x i16> @test_non_const_splat_i16(i16 %elt) {
   ret <2 x i16> %splat
 }
 
+; Test add(vec, splat(scalar)) pattern
+define <4 x i8> @test_padd_bs_from_scalar_splat(<4 x i8> %a, i8 %b) {
+; CHECK-LABEL: test_padd_bs_from_scalar_splat:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    padd.bs a0, a0, a1
+; CHECK-NEXT:    ret
+  %insert = insertelement <4 x i8> poison, i8 %b, i32 0
+  %splat = shufflevector <4 x i8> %insert, <4 x i8> poison, <4 x i32> zeroinitializer
+  %res = add <4 x i8> %splat, %a
+  ret <4 x i8> %res
+}
+
+define <2 x i16> @test_padd_hs_from_scalar_splat(<2 x i16> %a, i16 %b) {
+; CHECK-LABEL: test_padd_hs_from_scalar_splat:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    padd.hs a0, a0, a1
+; CHECK-NEXT:    ret
+  %insert = insertelement <2 x i16> poison, i16 %b, i32 0
+  %splat = shufflevector <2 x i16> %insert, <2 x i16> poison, <2 x i32> zeroinitializer
+  %res = add <2 x i16> %splat, %a
+  ret <2 x i16> %res
+}
+
 define <4 x i8> @test_build_vector_i8(i8 %a, i8 %c, i8 %b, i8 %d) {
 ; CHECK-RV32-LABEL: test_build_vector_i8:
 ; CHECK-RV32:       # %bb.0:
@@ -1918,10 +1941,10 @@ define <2 x i16> @test_select_v2i16(i1 %cond, <2 x i16> %a, <2 x i16> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    andi a3, a0, 1
 ; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:    bnez a3, .LBB134_2
+; CHECK-NEXT:    bnez a3, .LBB136_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a0, a2
-; CHECK-NEXT:  .LBB134_2:
+; CHECK-NEXT:  .LBB136_2:
 ; CHECK-NEXT:    ret
   %res = select i1 %cond, <2 x i16> %a, <2 x i16> %b
   ret <2 x i16> %res
@@ -1932,10 +1955,10 @@ define <4 x i8> @test_select_v4i8(i1 %cond, <4 x i8> %a, <4 x i8> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    andi a3, a0, 1
 ; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:    bnez a3, .LBB135_2
+; CHECK-NEXT:    bnez a3, .LBB137_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a0, a2
-; CHECK-NEXT:  .LBB135_2:
+; CHECK-NEXT:  .LBB137_2:
 ; CHECK-NEXT:    ret
   %res = select i1 %cond, <4 x i8> %a, <4 x i8> %b
   ret <4 x i8> %res
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
index 2d1c6d737a640..a98509093e549 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
@@ -863,6 +863,40 @@ define <2 x i32> @test_non_const_splat_i32(i32 %elt) {
   ret <2 x i32> %splat
 }
 
+; Test add(vec, splat(scalar)) pattern
+define <8 x i8> @test_padd_bs_from_scalar_splat(<8 x i8> %a, i8 %b) {
+; CHECK-LABEL: test_padd_bs_from_scalar_splat:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    padd.bs a0, a0, a1
+; CHECK-NEXT:    ret
+  %insert = insertelement <8 x i8> poison, i8 %b, i32 0
+  %splat = shufflevector <8 x i8> %insert, <8 x i8> poison, <8 x i32> zeroinitializer
+  %res = add <8 x i8> %splat, %a
+  ret <8 x i8> %res
+}
+
+define <4 x i16> @test_padd_hs_from_scalar_splat(<4 x i16> %a, i16 %b) {
+; CHECK-LABEL: test_padd_hs_from_scalar_splat:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    padd.hs a0, a0, a1
+; CHECK-NEXT:    ret
+  %insert = insertelement <4 x i16> poison, i16 %b, i32 0
+  %splat = shufflevector <4 x i16> %insert, <4 x i16> poison, <4 x i32> zeroinitializer
+  %res = add <4 x i16> %splat, %a
+  ret <4 x i16> %res
+}
+
+define <2 x i32> @test_padd_ws_from_scalar_splat(<2 x i32> %a, i32 %b) {
+; CHECK-LABEL: test_padd_ws_from_scalar_splat:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    padd.ws a0, a0, a1
+; CHECK-NEXT:    ret
+  %insert = insertelement <2 x i32> poison, i32 %b, i32 0
+  %splat = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer
+  %res = add <2 x i32> %splat, %a
+  ret <2 x i32> %res
+}
+
 define <8 x i8> @test_build_vector_i8(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g, i8 %h) {
 ; CHECK-LABEL: test_build_vector_i8:
 ; CHECK:       # %bb.0:
@@ -2440,10 +2474,10 @@ define <4 x i16> @test_select_v4i16(i1 %cond, <4 x i16> %a, <4 x i16> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    andi a3, a0, 1
 ; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:    bnez a3, .LBB196_2
+; CHECK-NEXT:    bnez a3, .LBB199_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a0, a2
-; CHECK-NEXT:  .LBB196_2:
+; CHECK-NEXT:  .LBB199_2:
 ; CHECK-NEXT:    ret
   %res = select i1 %cond, <4 x i16> %a, <4 x i16> %b
   ret <4 x i16> %res
@@ -2454,10 +2488,10 @@ define <8 x i8> @test_select_v8i8(i1 %cond, <8 x i8> %a, <8 x i8> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    andi a3, a0, 1
 ; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:    bnez a3, .LBB197_2
+; CHECK-NEXT:    bnez a3, .LBB200_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a0, a2
-; CHECK-NEXT:  .LBB197_2:
+; CHECK-NEXT:  .LBB200_2:
 ; CHECK-NEXT:    ret
   %res = select i1 %cond, <8 x i8> %a, <8 x i8> %b
   ret <8 x i8> %res
@@ -2468,10 +2502,10 @@ define <2 x i32> @test_select_v2i32(i1 %cond, <2 x i32> %a, <2 x i32> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    andi a3, a0, 1
 ; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:    bnez a3, .LBB198_2
+; CHECK-NEXT:    bnez a3, .LBB201_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a0, a2
-; CHECK-NEXT:  .LBB198_2:
+; CHECK-NEXT:  .LBB201_2:
 ; CHECK-NEXT:    ret
   %res = select i1 %cond, <2 x i32> %a, <2 x i32> %b
   ret <2 x i32> %res

>From 4d40f800b50b1b5e12c28be1414f6e4affdb0b6a Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Fri, 3 Apr 2026 04:41:01 +0000
Subject: [PATCH 2/4] Rename padd_s test and add missing RHS test

---
 llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll | 38 +++++++++++++----
 llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll | 57 +++++++++++++++++++------
 2 files changed, 75 insertions(+), 20 deletions(-)

diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
index ef7a64a0227a4..3c03eaebeb06b 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv32.ll
@@ -626,8 +626,8 @@ define <2 x i16> @test_non_const_splat_i16(i16 %elt) {
 }
 
 ; Test add(vec, splat(scalar)) pattern
-define <4 x i8> @test_padd_bs_from_scalar_splat(<4 x i8> %a, i8 %b) {
-; CHECK-LABEL: test_padd_bs_from_scalar_splat:
+define <4 x i8> @test_padd_bs_splat_lhs(<4 x i8> %a, i8 %b) {
+; CHECK-LABEL: test_padd_bs_splat_lhs:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    padd.bs a0, a0, a1
 ; CHECK-NEXT:    ret
@@ -637,8 +637,19 @@ define <4 x i8> @test_padd_bs_from_scalar_splat(<4 x i8> %a, i8 %b) {
   ret <4 x i8> %res
 }
 
-define <2 x i16> @test_padd_hs_from_scalar_splat(<2 x i16> %a, i16 %b) {
-; CHECK-LABEL: test_padd_hs_from_scalar_splat:
+define <4 x i8> @test_padd_bs_splat_rhs(<4 x i8> %a, i8 %b) {
+; CHECK-LABEL: test_padd_bs_splat_rhs:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    padd.bs a0, a0, a1
+; CHECK-NEXT:    ret
+  %insert = insertelement <4 x i8> poison, i8 %b, i32 0
+  %splat = shufflevector <4 x i8> %insert, <4 x i8> poison, <4 x i32> zeroinitializer
+  %res = add <4 x i8> %a, %splat
+  ret <4 x i8> %res
+}
+
+define <2 x i16> @test_padd_hs_splat_lhs(<2 x i16> %a, i16 %b) {
+; CHECK-LABEL: test_padd_hs_splat_lhs:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    padd.hs a0, a0, a1
 ; CHECK-NEXT:    ret
@@ -648,6 +659,17 @@ define <2 x i16> @test_padd_hs_from_scalar_splat(<2 x i16> %a, i16 %b) {
   ret <2 x i16> %res
 }
 
+define <2 x i16> @test_padd_hs_splat_rhs(<2 x i16> %a, i16 %b) {
+; CHECK-LABEL: test_padd_hs_splat_rhs:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    padd.hs a0, a0, a1
+; CHECK-NEXT:    ret
+  %insert = insertelement <2 x i16> poison, i16 %b, i32 0
+  %splat = shufflevector <2 x i16> %insert, <2 x i16> poison, <2 x i32> zeroinitializer
+  %res = add <2 x i16> %a, %splat
+  ret <2 x i16> %res
+}
+
 define <4 x i8> @test_build_vector_i8(i8 %a, i8 %c, i8 %b, i8 %d) {
 ; CHECK-RV32-LABEL: test_build_vector_i8:
 ; CHECK-RV32:       # %bb.0:
@@ -1941,10 +1963,10 @@ define <2 x i16> @test_select_v2i16(i1 %cond, <2 x i16> %a, <2 x i16> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    andi a3, a0, 1
 ; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:    bnez a3, .LBB136_2
+; CHECK-NEXT:    bnez a3, .LBB138_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a0, a2
-; CHECK-NEXT:  .LBB136_2:
+; CHECK-NEXT:  .LBB138_2:
 ; CHECK-NEXT:    ret
   %res = select i1 %cond, <2 x i16> %a, <2 x i16> %b
   ret <2 x i16> %res
@@ -1955,10 +1977,10 @@ define <4 x i8> @test_select_v4i8(i1 %cond, <4 x i8> %a, <4 x i8> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    andi a3, a0, 1
 ; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:    bnez a3, .LBB137_2
+; CHECK-NEXT:    bnez a3, .LBB139_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a0, a2
-; CHECK-NEXT:  .LBB137_2:
+; CHECK-NEXT:  .LBB139_2:
 ; CHECK-NEXT:    ret
   %res = select i1 %cond, <4 x i8> %a, <4 x i8> %b
   ret <4 x i8> %res
diff --git a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
index a98509093e549..c431d8a2a1197 100644
--- a/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
+++ b/llvm/test/CodeGen/RISCV/rvp-ext-rv64.ll
@@ -864,8 +864,8 @@ define <2 x i32> @test_non_const_splat_i32(i32 %elt) {
 }
 
 ; Test add(vec, splat(scalar)) pattern
-define <8 x i8> @test_padd_bs_from_scalar_splat(<8 x i8> %a, i8 %b) {
-; CHECK-LABEL: test_padd_bs_from_scalar_splat:
+define <8 x i8> @test_padd_bs_splat_lhs(<8 x i8> %a, i8 %b) {
+; CHECK-LABEL: test_padd_bs_splat_lhs:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    padd.bs a0, a0, a1
 ; CHECK-NEXT:    ret
@@ -875,8 +875,19 @@ define <8 x i8> @test_padd_bs_from_scalar_splat(<8 x i8> %a, i8 %b) {
   ret <8 x i8> %res
 }
 
-define <4 x i16> @test_padd_hs_from_scalar_splat(<4 x i16> %a, i16 %b) {
-; CHECK-LABEL: test_padd_hs_from_scalar_splat:
+define <8 x i8> @test_padd_bs_splat_rhs(<8 x i8> %a, i8 %b) {
+; CHECK-LABEL: test_padd_bs_splat_rhs:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    padd.bs a0, a0, a1
+; CHECK-NEXT:    ret
+  %insert = insertelement <8 x i8> poison, i8 %b, i32 0
+  %splat = shufflevector <8 x i8> %insert, <8 x i8> poison, <8 x i32> zeroinitializer
+  %res = add <8 x i8> %a, %splat
+  ret <8 x i8> %res
+}
+
+define <4 x i16> @test_padd_hs_splat_lhs(<4 x i16> %a, i16 %b) {
+; CHECK-LABEL: test_padd_hs_splat_lhs:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    padd.hs a0, a0, a1
 ; CHECK-NEXT:    ret
@@ -886,8 +897,19 @@ define <4 x i16> @test_padd_hs_from_scalar_splat(<4 x i16> %a, i16 %b) {
   ret <4 x i16> %res
 }
 
-define <2 x i32> @test_padd_ws_from_scalar_splat(<2 x i32> %a, i32 %b) {
-; CHECK-LABEL: test_padd_ws_from_scalar_splat:
+define <4 x i16> @test_padd_hs_splat_rhs(<4 x i16> %a, i16 %b) {
+; CHECK-LABEL: test_padd_hs_splat_rhs:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    padd.hs a0, a0, a1
+; CHECK-NEXT:    ret
+  %insert = insertelement <4 x i16> poison, i16 %b, i32 0
+  %splat = shufflevector <4 x i16> %insert, <4 x i16> poison, <4 x i32> zeroinitializer
+  %res = add <4 x i16> %a, %splat
+  ret <4 x i16> %res
+}
+
+define <2 x i32> @test_padd_ws_splat_lhs(<2 x i32> %a, i32 %b) {
+; CHECK-LABEL: test_padd_ws_splat_lhs:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    padd.ws a0, a0, a1
 ; CHECK-NEXT:    ret
@@ -897,6 +919,17 @@ define <2 x i32> @test_padd_ws_from_scalar_splat(<2 x i32> %a, i32 %b) {
   ret <2 x i32> %res
 }
 
+define <2 x i32> @test_padd_ws_splat_rhs(<2 x i32> %a, i32 %b) {
+; CHECK-LABEL: test_padd_ws_splat_rhs:
+; CHECK:       # %bb.0:
+; CHECK-NEXT:    padd.ws a0, a0, a1
+; CHECK-NEXT:    ret
+  %insert = insertelement <2 x i32> poison, i32 %b, i32 0
+  %splat = shufflevector <2 x i32> %insert, <2 x i32> poison, <2 x i32> zeroinitializer
+  %res = add <2 x i32> %a, %splat
+  ret <2 x i32> %res
+}
+
 define <8 x i8> @test_build_vector_i8(i8 %a, i8 %b, i8 %c, i8 %d, i8 %e, i8 %f, i8 %g, i8 %h) {
 ; CHECK-LABEL: test_build_vector_i8:
 ; CHECK:       # %bb.0:
@@ -2474,10 +2507,10 @@ define <4 x i16> @test_select_v4i16(i1 %cond, <4 x i16> %a, <4 x i16> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    andi a3, a0, 1
 ; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:    bnez a3, .LBB199_2
+; CHECK-NEXT:    bnez a3, .LBB202_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a0, a2
-; CHECK-NEXT:  .LBB199_2:
+; CHECK-NEXT:  .LBB202_2:
 ; CHECK-NEXT:    ret
   %res = select i1 %cond, <4 x i16> %a, <4 x i16> %b
   ret <4 x i16> %res
@@ -2488,10 +2521,10 @@ define <8 x i8> @test_select_v8i8(i1 %cond, <8 x i8> %a, <8 x i8> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    andi a3, a0, 1
 ; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:    bnez a3, .LBB200_2
+; CHECK-NEXT:    bnez a3, .LBB203_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a0, a2
-; CHECK-NEXT:  .LBB200_2:
+; CHECK-NEXT:  .LBB203_2:
 ; CHECK-NEXT:    ret
   %res = select i1 %cond, <8 x i8> %a, <8 x i8> %b
   ret <8 x i8> %res
@@ -2502,10 +2535,10 @@ define <2 x i32> @test_select_v2i32(i1 %cond, <2 x i32> %a, <2 x i32> %b) {
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    andi a3, a0, 1
 ; CHECK-NEXT:    mv a0, a1
-; CHECK-NEXT:    bnez a3, .LBB201_2
+; CHECK-NEXT:    bnez a3, .LBB204_2
 ; CHECK-NEXT:  # %bb.1:
 ; CHECK-NEXT:    mv a0, a2
-; CHECK-NEXT:  .LBB201_2:
+; CHECK-NEXT:  .LBB204_2:
 ; CHECK-NEXT:    ret
   %res = select i1 %cond, <2 x i32> %a, <2 x i32> %b
   ret <2 x i32> %res

>From 46c4934fe897554898b2f1442e8275fc3cf1ed66 Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Fri, 3 Apr 2026 05:03:00 +0000
Subject: [PATCH 3/4] Simplify PADD_*S patterns by removing redundant variants
 and casts

---
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 24 +++---------------------
 1 file changed, 3 insertions(+), 21 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 62e2c158f8011..66a53129f72d3 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1796,25 +1796,13 @@ let Predicates = [HasStdExtP] in {
   // Basic 8-bit arithmetic patterns
   def : PatGprGpr<add, PADD_B, XLenVecI8VT>;
   def : PatGprGpr<sub, PSUB_B, XLenVecI8VT>;
-  def : Pat<(XLenVecI8VT
-              (add (XLenVecI8VT GPR:$rs1),
-                   (XLenVecI8VT (splat_vector (XLenVT GPR:$rs2))))),
-            (PADD_BS GPR:$rs1, GPR:$rs2)>;
-  def : Pat<(XLenVecI8VT
-              (add (XLenVecI8VT (splat_vector (XLenVT GPR:$rs2))),
-                   (XLenVecI8VT GPR:$rs1))),
+  def : Pat<(XLenVecI8VT (add GPR:$rs1, (splat_vector (XLenVT GPR:$rs2)))),
             (PADD_BS GPR:$rs1, GPR:$rs2)>;
 
   // Basic 16-bit arithmetic patterns
   def : PatGprGpr<add, PADD_H, XLenVecI16VT>;
   def : PatGprGpr<sub, PSUB_H, XLenVecI16VT>;
-  def : Pat<(XLenVecI16VT
-              (add (XLenVecI16VT GPR:$rs1),
-                   (XLenVecI16VT (splat_vector (XLenVT GPR:$rs2))))),
-            (PADD_HS GPR:$rs1, GPR:$rs2)>;
-  def : Pat<(XLenVecI16VT
-              (add (XLenVecI16VT (splat_vector (XLenVT GPR:$rs2))),
-                   (XLenVecI16VT GPR:$rs1))),
+  def : Pat<(XLenVecI16VT (add GPR:$rs1, (splat_vector (XLenVT GPR:$rs2)))),
             (PADD_HS GPR:$rs1, GPR:$rs2)>;
 
   // 8-bit bitwise operation patterns
@@ -2030,13 +2018,7 @@ let Predicates = [HasStdExtP, IsRV64] in {
   // Basic 32-bit arithmetic patterns
   def : PatGprGpr<add, PADD_W, v2i32>;
   def : PatGprGpr<sub, PSUB_W, v2i32>;
-  def : Pat<(v2i32
-              (add (v2i32 GPR:$rs1),
-                   (v2i32 (splat_vector (XLenVT GPR:$rs2))))),
-            (PADD_WS GPR:$rs1, GPR:$rs2)>;
-  def : Pat<(v2i32
-              (add (v2i32 (splat_vector (XLenVT GPR:$rs2))),
-                   (v2i32 GPR:$rs1))),
+  def : Pat<(v2i32 (add GPR:$rs1, (splat_vector (XLenVT GPR:$rs2)))),
             (PADD_WS GPR:$rs1, GPR:$rs2)>;
 
   // 32-bit bitwise operation patterns

>From d4c53885d095a9487c8fe7df947769ec1bce703a Mon Sep 17 00:00:00 2001
From: SiHuaN <liyongtai at iscas.ac.cn>
Date: Fri, 3 Apr 2026 05:21:28 +0000
Subject: [PATCH 4/4] Factor out splat-based patterns into a helper class

---
 llvm/lib/Target/RISCV/RISCVInstrInfo.td  | 5 +++++
 llvm/lib/Target/RISCV/RISCVInstrInfoP.td | 9 +++------
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfo.td b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
index 8aea2f91468c1..ef56275118f2e 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfo.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfo.td
@@ -1375,6 +1375,11 @@ class PatGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
 class PatGprGpr<SDPatternOperator OpNode, RVInst Inst, ValueType vt = XLenVT>
     : Pat<(vt (OpNode (vt GPR:$rs1), (vt GPR:$rs2))), (Inst GPR:$rs1, GPR:$rs2)>;
 
+class PatGprSplatGpr<SDPatternOperator OpNode, RVInst Inst, ValueType VecVT,
+                     ValueType ScalarVT = XLenVT>
+    : Pat<(VecVT (OpNode GPR:$rs1, (splat_vector (ScalarVT GPR:$rs2)))),
+          (Inst GPR:$rs1, GPR:$rs2)>;
+
 class PatGprImm<SDPatternOperator OpNode, RVInst Inst, SDPatternOperator ImmType,
                 ValueType vt = XLenVT>
     : Pat<(vt (OpNode (vt GPR:$rs1), ImmType:$imm)),
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
index 66a53129f72d3..426fc332debc5 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoP.td
@@ -1796,14 +1796,12 @@ let Predicates = [HasStdExtP] in {
   // Basic 8-bit arithmetic patterns
   def : PatGprGpr<add, PADD_B, XLenVecI8VT>;
   def : PatGprGpr<sub, PSUB_B, XLenVecI8VT>;
-  def : Pat<(XLenVecI8VT (add GPR:$rs1, (splat_vector (XLenVT GPR:$rs2)))),
-            (PADD_BS GPR:$rs1, GPR:$rs2)>;
+  def : PatGprSplatGpr<add, PADD_BS, XLenVecI8VT>;
 
   // Basic 16-bit arithmetic patterns
   def : PatGprGpr<add, PADD_H, XLenVecI16VT>;
   def : PatGprGpr<sub, PSUB_H, XLenVecI16VT>;
-  def : Pat<(XLenVecI16VT (add GPR:$rs1, (splat_vector (XLenVT GPR:$rs2)))),
-            (PADD_HS GPR:$rs1, GPR:$rs2)>;
+  def : PatGprSplatGpr<add, PADD_HS, XLenVecI16VT>;
 
   // 8-bit bitwise operation patterns
   def : PatGprGpr<and, AND, XLenVecI8VT>;
@@ -2018,8 +2016,7 @@ let Predicates = [HasStdExtP, IsRV64] in {
   // Basic 32-bit arithmetic patterns
   def : PatGprGpr<add, PADD_W, v2i32>;
   def : PatGprGpr<sub, PSUB_W, v2i32>;
-  def : Pat<(v2i32 (add GPR:$rs1, (splat_vector (XLenVT GPR:$rs2)))),
-            (PADD_WS GPR:$rs1, GPR:$rs2)>;
+  def : PatGprSplatGpr<add, PADD_WS, v2i32>;
 
   // 32-bit bitwise operation patterns
   def : PatGprGpr<and, AND, v2i32>;