[llvm-branch-commits] [llvm] [LoongArch] Select `V{AND, OR, XOR, NOR}I.B` for bitwise with byte splat immediates (PR #192217)

Wed Apr 15 02:14:05 PDT 2026

https://github.com/heiher created https://github.com/llvm/llvm-project/pull/192217

The `V{AND,OR,XOR,NOR}I.B` instructions operate on byte elements and accept an 8-bit immediate. However, when the same byte splat constant is used with wider vector element types (e.g. v8i16, v4i32, v2i64), instruction selection currently falls back to materializing the constant in a temporary register.

```
vrepli.b  -1
vxor.v
```

even though the immediate form is available:

```
vxori.b 255
```

This happens because selectVSplatImm requires the splat bit width to match the vector element size, preventing matching byte splat immediates for non-i8 vector types.

Generalize selectVSplatImm to optionally accept an explicit element bit width and introduce a new vsplat_i8_uimm8 pattern to detect byte splat immediates independently of the vector element type. Use this pattern to extend the V*I.B instruction patterns to wider vector element types.

This enables direct selection of byte-immediate bitwise instructions, reduces instruction count, and avoids unnecessary temporary registers in common mask operations.

>From 49d7237290ad724fec65ce168a5e9fe99ece7943 Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui at loongson.cn>
Date: Tue, 14 Apr 2026 22:23:10 +0800
Subject: [PATCH] [LoongArch] Select `V{AND,OR,XOR,NOR}I.B` for bitwise with
 byte splat immediates

The `V{AND,OR,XOR,NOR}I.B` instructions operate on byte elements and accept
an 8-bit immediate. However, when the same byte splat constant is used with
wider vector element types (e.g. v8i16, v4i32, v2i64), instruction selection
currently falls back to materializing the constant in a temporary register.

```
vrepli.b  -1
vxor.v
```

even though the immediate form is available:

```
vxori.b 255
```

This happens because selectVSplatImm requires the splat bit width to match
the vector element size, preventing matching byte splat immediates for
non-i8 vector types.

Generalize selectVSplatImm to optionally accept an explicit element bit
width and introduce a new vsplat_i8_uimm8 pattern to detect byte splat
immediates independently of the vector element type. Use this pattern to
extend the V*I.B instruction patterns to wider vector element types.

This enables direct selection of byte-immediate bitwise instructions,
reduces instruction count, and avoids unnecessary temporary registers in
common mask operations.
---
 .../AsmParser/LoongArchAsmParser.cpp          |  1 +
 .../LoongArch/LoongArchISelDAGToDAG.cpp       |  7 +++--
 .../Target/LoongArch/LoongArchISelDAGToDAG.h  |  2 +-
 .../Target/LoongArch/LoongArchInstrInfo.td    |  5 ++-
 .../LoongArch/LoongArchLASXInstrInfo.td       | 24 ++++++++++++++
 .../Target/LoongArch/LoongArchLSXInstrInfo.td | 31 +++++++++++++++++--
 .../test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll |  9 ++----
 .../LoongArch/lasx/ir-instruction/and.ll      |  9 ++----
 .../LoongArch/lasx/ir-instruction/icmp.ll     |  9 ++----
 .../LoongArch/lasx/ir-instruction/nor.ll      |  9 ++----
 .../LoongArch/lasx/ir-instruction/or.ll       |  9 ++----
 .../LoongArch/lasx/ir-instruction/xor.ll      |  9 ++----
 llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll |  3 +-
 llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll |  9 ++----
 .../LoongArch/lsx/ir-instruction/and.ll       |  9 ++----
 .../LoongArch/lsx/ir-instruction/icmp.ll      |  9 ++----
 .../LoongArch/lsx/ir-instruction/nor.ll       |  9 ++----
 .../LoongArch/lsx/ir-instruction/or.ll        |  9 ++----
 .../LoongArch/lsx/ir-instruction/xor.ll       |  9 ++----
 llvm/test/CodeGen/LoongArch/pr177863.ll       |  6 ++--
 20 files changed, 102 insertions(+), 85 deletions(-)

diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
index c8c3a2bc9ce6d..04b8a75623925 100644
--- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
+++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
@@ -552,6 +552,7 @@ class LoongArchOperand : public MCParsedAsmOperand {
                      IsValidKind;
   }
 
+  bool isImm16() const { return isSImm<16>() || isUImm<16>(); }
   bool isImm32() const { return isSImm<32>() || isUImm<32>(); }
   bool isImm64() const {
     if (!isImm())
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
index 442f0a46a4983..265b86a4c469e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
@@ -400,16 +400,17 @@ bool LoongArchDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm,
   return true;
 }
 
-template <unsigned ImmBitSize, bool IsSigned>
+template <unsigned ImmBitSize, unsigned EltBitSize, bool IsSigned>
 bool LoongArchDAGToDAGISel::selectVSplatImm(SDValue N, SDValue &SplatVal) {
   APInt ImmValue;
   EVT EltTy = N->getValueType(0).getVectorElementType();
+  unsigned EltBitWidth = EltBitSize ? EltBitSize : EltTy.getSizeInBits();
 
   if (N->getOpcode() == ISD::BITCAST)
     N = N->getOperand(0);
 
-  if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
-      ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+  if (selectVSplat(N.getNode(), ImmValue, EltBitWidth) &&
+      ImmValue.getBitWidth() == EltBitWidth) {
     if (IsSigned && ImmValue.isSignedIntN(ImmBitSize)) {
       SplatVal = CurDAG->getSignedTargetConstant(
           ImmValue.getSExtValue(), SDLoc(N), Subtarget->getGRLenVT());
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
index 4c8dcb8fa48af..60dcfd9dac62a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
@@ -60,7 +60,7 @@ class LoongArchDAGToDAGISel : public SelectionDAGISel {
 
   bool selectVSplat(SDNode *N, APInt &Imm, unsigned MinSizeInBits) const;
 
-  template <unsigned ImmSize, bool IsSigned = false>
+  template <unsigned ImmSize, unsigned EltSize = 0, bool IsSigned = false>
   bool selectVSplatImm(SDValue N, SDValue &SplatVal);
 
   bool selectVSplatUimmInvPow2(SDValue N, SDValue &SplatImm) const;
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 555dc31e77755..f51ff39714267 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -279,8 +279,11 @@ def GPRMemAtomic : RegisterOperand<GPR> {
   let PrintMethod = "printAtomicMemOp";
 }
 
-// A parameterized register class alternative to i32imm/i64imm from Target.td.
+// A parameterized register class alternative to i16imm/i32imm/i64imm from Target.td.
 def grlenimm : Operand<GRLenVT>;
+def imm16 : Operand<GRLenVT> {
+  let ParserMatchClass = ImmAsmOperand<"", 16, "">;
+}
 def imm32 : Operand<GRLenVT> {
   let ParserMatchClass = ImmAsmOperand<"", 32, "">;
 }
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 1b18d16a4cabb..0b82ed114d193 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1412,15 +1412,39 @@ def : Pat<(or (vt LASX256:$xj), (vt (vnot LASX256:$xk))),
 // XVANDI_B
 def : Pat<(and (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))),
           (XVANDI_B LASX256:$xj, uimm8:$imm)>;
+def : Pat<(and (v16i16 LASX256:$xj), (v16i16 (vsplat_i8_uimm8 imm16:$imm))),
+          (XVANDI_B LASX256:$xj, imm16:$imm)>;
+def : Pat<(and (v8i32 LASX256:$xj), (v8i32 (vsplat_i8_uimm8 imm32:$imm))),
+          (XVANDI_B LASX256:$xj, imm32:$imm)>;
+def : Pat<(and (v4i64 LASX256:$xj), (v4i64 (vsplat_i8_uimm8 imm64:$imm))),
+          (XVANDI_B LASX256:$xj, imm64:$imm)>;
 // XVORI_B
 def : Pat<(or (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))),
           (XVORI_B LASX256:$xj, uimm8:$imm)>;
+def : Pat<(or (v16i16 LASX256:$xj), (v16i16 (vsplat_i8_uimm8 imm16:$imm))),
+          (XVORI_B LASX256:$xj, imm16:$imm)>;
+def : Pat<(or (v8i32 LASX256:$xj), (v8i32 (vsplat_i8_uimm8 imm32:$imm))),
+          (XVORI_B LASX256:$xj, imm32:$imm)>;
+def : Pat<(or (v4i64 LASX256:$xj), (v4i64 (vsplat_i8_uimm8 imm64:$imm))),
+          (XVORI_B LASX256:$xj, imm64:$imm)>;
 // XVXORI_B
 def : Pat<(xor (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))),
           (XVXORI_B LASX256:$xj, uimm8:$imm)>;
+def : Pat<(xor (v16i16 LASX256:$xj), (v16i16 (vsplat_i8_uimm8 imm16:$imm))),
+          (XVXORI_B LASX256:$xj, imm16:$imm)>;
+def : Pat<(xor (v8i32 LASX256:$xj), (v8i32 (vsplat_i8_uimm8 imm32:$imm))),
+          (XVXORI_B LASX256:$xj, imm32:$imm)>;
+def : Pat<(xor (v4i64 LASX256:$xj), (v4i64 (vsplat_i8_uimm8 imm64:$imm))),
+          (XVXORI_B LASX256:$xj, imm64:$imm)>;
 // XVNORI_B
 def : Pat<(vnot (or (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm)))),
           (XVNORI_B LASX256:$xj, uimm8:$imm)>;
+def : Pat<(vnot (or (v16i16 LASX256:$xj), (v16i16 (vsplat_i8_uimm8 imm16:$imm)))),
+          (XVNORI_B LASX256:$xj, imm16:$imm)>;
+def : Pat<(vnot (or (v8i32 LASX256:$xj), (v8i32 (vsplat_i8_uimm8 imm32:$imm)))),
+          (XVNORI_B LASX256:$xj, imm32:$imm)>;
+def : Pat<(vnot (or (v4i64 LASX256:$xj), (v4i64 (vsplat_i8_uimm8 imm64:$imm)))),
+          (XVNORI_B LASX256:$xj, imm64:$imm)>;
 
 // XVBSLL_V
 foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32,
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index dbc0d68f2254f..f7927cb64c13b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -196,13 +196,16 @@ def vsplati64imm63 : PatFrag<(ops node:$reg),
                              (and node:$reg, vsplati64_imm_eq_63)>;
 
 foreach N = [3, 4, 5, 6, 8] in
-  def SplatPat_uimm#N : ComplexPattern<vAny, 1, "selectVSplatImm<"#N#">",
+  def SplatPat_uimm#N : ComplexPattern<vAny, 1, "selectVSplatImm<"#N#", 0>",
                                        [build_vector, bitconvert], [], 2>;
 
 foreach N = [5] in
-  def SplatPat_simm#N : ComplexPattern<vAny, 1, "selectVSplatImm<"#N#", true>",
+  def SplatPat_simm#N : ComplexPattern<vAny, 1, "selectVSplatImm<"#N#", 0, true>",
                                        [build_vector, bitconvert]>;
 
+def vsplat_i8_uimm8 : ComplexPattern<vAny, 1, "selectVSplatImm<8, 8>",
+                                     [build_vector, bitconvert]>;
+
 def vsplat_uimm_inv_pow2 : ComplexPattern<vAny, 1, "selectVSplatUimmInvPow2",
                                           [build_vector, bitconvert]>;
 
@@ -1622,15 +1625,39 @@ def : Pat<(or (vt LSX128:$vj), (vt (vnot LSX128:$vk))),
 // VANDI_B
 def : Pat<(and (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))),
           (VANDI_B LSX128:$vj, uimm8:$imm)>;
+def : Pat<(and (v8i16 LSX128:$vj), (v8i16 (vsplat_i8_uimm8 imm16:$imm))),
+          (VANDI_B LSX128:$vj, imm16:$imm)>;
+def : Pat<(and (v4i32 LSX128:$vj), (v4i32 (vsplat_i8_uimm8 imm32:$imm))),
+          (VANDI_B LSX128:$vj, imm32:$imm)>;
+def : Pat<(and (v2i64 LSX128:$vj), (v2i64 (vsplat_i8_uimm8 imm64:$imm))),
+          (VANDI_B LSX128:$vj, imm64:$imm)>;
 // VORI_B
 def : Pat<(or (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))),
           (VORI_B LSX128:$vj, uimm8:$imm)>;
+def : Pat<(or (v8i16 LSX128:$vj), (v8i16 (vsplat_i8_uimm8 imm16:$imm))),
+          (VORI_B LSX128:$vj, imm16:$imm)>;
+def : Pat<(or (v4i32 LSX128:$vj), (v4i32 (vsplat_i8_uimm8 imm32:$imm))),
+          (VORI_B LSX128:$vj, imm32:$imm)>;
+def : Pat<(or (v2i64 LSX128:$vj), (v2i64 (vsplat_i8_uimm8 imm64:$imm))),
+          (VORI_B LSX128:$vj, imm64:$imm)>;
 // VXORI_B
 def : Pat<(xor (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))),
           (VXORI_B LSX128:$vj, uimm8:$imm)>;
+def : Pat<(xor (v8i16 LSX128:$vj), (v8i16 (vsplat_i8_uimm8 imm16:$imm))),
+          (VXORI_B LSX128:$vj, imm16:$imm)>;
+def : Pat<(xor (v4i32 LSX128:$vj), (v4i32 (vsplat_i8_uimm8 imm32:$imm))),
+          (VXORI_B LSX128:$vj, imm32:$imm)>;
+def : Pat<(xor (v2i64 LSX128:$vj), (v2i64 (vsplat_i8_uimm8 imm64:$imm))),
+          (VXORI_B LSX128:$vj, imm64:$imm)>;
 // VNORI_B
 def : Pat<(vnot (or (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm)))),
           (VNORI_B LSX128:$vj, uimm8:$imm)>;
+def : Pat<(vnot (or (v8i16 LSX128:$vj), (v8i16 (vsplat_i8_uimm8 imm16:$imm)))),
+          (VNORI_B LSX128:$vj, imm16:$imm)>;
+def : Pat<(vnot (or (v4i32 LSX128:$vj), (v4i32 (vsplat_i8_uimm8 imm32:$imm)))),
+          (VNORI_B LSX128:$vj, imm32:$imm)>;
+def : Pat<(vnot (or (v2i64 LSX128:$vj), (v2i64 (vsplat_i8_uimm8 imm64:$imm)))),
+          (VNORI_B LSX128:$vj, imm64:$imm)>;
 
 // VBSLL_V
 foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32,
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll
index b3155c9313a8a..9b40d400c9970 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll
@@ -125,8 +125,7 @@ define void @not_ctlz_v16i16(ptr %src, ptr %dst) nounwind {
 ; CHECK-LABEL: not_ctlz_v16i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xvld $xr0, $a0, 0
-; CHECK-NEXT:    xvrepli.b $xr1, -1
-; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvxori.b $xr0, $xr0, 255
 ; CHECK-NEXT:    xvclz.h $xr0, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a1, 0
 ; CHECK-NEXT:    ret
@@ -141,8 +140,7 @@ define void @not_ctlz_v8i32(ptr %src, ptr %dst) nounwind {
 ; CHECK-LABEL: not_ctlz_v8i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xvld $xr0, $a0, 0
-; CHECK-NEXT:    xvrepli.b $xr1, -1
-; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvxori.b $xr0, $xr0, 255
 ; CHECK-NEXT:    xvclz.w $xr0, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a1, 0
 ; CHECK-NEXT:    ret
@@ -157,8 +155,7 @@ define void @not_ctlz_v4i64(ptr %src, ptr %dst) nounwind {
 ; CHECK-LABEL: not_ctlz_v4i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    xvld $xr0, $a0, 0
-; CHECK-NEXT:    xvrepli.b $xr1, -1
-; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvxori.b $xr0, $xr0, 255
 ; CHECK-NEXT:    xvclz.d $xr0, $xr0
 ; CHECK-NEXT:    xvst $xr0, $a1, 0
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll
index 91f50f28464d2..7bd56e427c1cb 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll
@@ -128,8 +128,7 @@ entry:
 define <16 x i16> @and_u_v16i16_1(<16 x i16> %a) nounwind {
 ; CHECK-LABEL: and_u_v16i16_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvrepli.b $xr1, 1
-; CHECK-NEXT:    xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvandi.b $xr0, $xr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = and <16 x i16> %a, splat (i16 257)
@@ -139,8 +138,7 @@ entry:
 define <8 x i32> @and_u_v8i32_1(<8 x i32> %a) nounwind {
 ; CHECK-LABEL: and_u_v8i32_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvrepli.b $xr1, 1
-; CHECK-NEXT:    xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvandi.b $xr0, $xr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = and <8 x i32> %a, splat (i32 16843009)
@@ -150,8 +148,7 @@ entry:
 define <4 x i64> @and_u_v4i64_1(<4 x i64> %a) nounwind {
 ; CHECK-LABEL: and_u_v4i64_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvrepli.b $xr1, 1
-; CHECK-NEXT:    xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvandi.b $xr0, $xr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = and <4 x i64> %a, splat (i64 72340172838076673)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll
index 47229fc9a0fc4..4ea54abacc6f6 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll
@@ -631,8 +631,7 @@ define void @v16i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-NEXT:    xvld $xr0, $a1, 0
 ; CHECK-NEXT:    xvld $xr1, $a2, 0
 ; CHECK-NEXT:    xvseq.h $xr0, $xr0, $xr1
-; CHECK-NEXT:    xvrepli.b $xr1, -1
-; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvxori.b $xr0, $xr0, 255
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <16 x i16>, ptr %a0
@@ -649,8 +648,7 @@ define void @v8i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-NEXT:    xvld $xr0, $a1, 0
 ; CHECK-NEXT:    xvld $xr1, $a2, 0
 ; CHECK-NEXT:    xvseq.w $xr0, $xr0, $xr1
-; CHECK-NEXT:    xvrepli.b $xr1, -1
-; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvxori.b $xr0, $xr0, 255
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i32>, ptr %a0
@@ -667,8 +665,7 @@ define void @v4i64_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-NEXT:    xvld $xr0, $a1, 0
 ; CHECK-NEXT:    xvld $xr1, $a2, 0
 ; CHECK-NEXT:    xvseq.d $xr0, $xr0, $xr1
-; CHECK-NEXT:    xvrepli.b $xr1, -1
-; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvxori.b $xr0, $xr0, 255
 ; CHECK-NEXT:    xvst $xr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i64>, ptr %a0
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/nor.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/nor.ll
index 691b98b0b1646..0aa3fde735e5c 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/nor.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/nor.ll
@@ -136,8 +136,7 @@ entry:
 define <16 x i16> @nor_u_v16i16_1(<16 x i16> %a) nounwind {
 ; CHECK-LABEL: nor_u_v16i16_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvrepli.b $xr1, 1
-; CHECK-NEXT:    xvnor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvnori.b $xr0, $xr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = or <16 x i16> %a, splat (i16 257)
@@ -148,8 +147,7 @@ entry:
 define <8 x i32> @nor_u_v8i32_1(<8 x i32> %a) nounwind {
 ; CHECK-LABEL: nor_u_v8i32_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvrepli.b $xr1, 1
-; CHECK-NEXT:    xvnor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvnori.b $xr0, $xr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = or <8 x i32> %a, splat (i32 16843009)
@@ -160,8 +158,7 @@ entry:
 define <4 x i64> @nor_u_v4i64_1(<4 x i64> %a) nounwind {
 ; CHECK-LABEL: nor_u_v4i64_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvrepli.b $xr1, 1
-; CHECK-NEXT:    xvnor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvnori.b $xr0, $xr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = or <4 x i64> %a, splat (i64 72340172838076673)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll
index a6f5e473d643d..174a0f1562bed 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll
@@ -128,8 +128,7 @@ entry:
 define <16 x i16> @or_u_v16i16_1(<16 x i16> %a) nounwind {
 ; CHECK-LABEL: or_u_v16i16_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvrepli.b $xr1, 1
-; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvori.b $xr0, $xr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = or <16 x i16> %a, splat (i16 257)
@@ -139,8 +138,7 @@ entry:
 define <8 x i32> @or_u_v8i32_1(<8 x i32> %a) nounwind {
 ; CHECK-LABEL: or_u_v8i32_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvrepli.b $xr1, 1
-; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvori.b $xr0, $xr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = or <8 x i32> %a, splat (i32 16843009)
@@ -150,8 +148,7 @@ entry:
 define <4 x i64> @or_u_v4i64_1(<4 x i64> %a) nounwind {
 ; CHECK-LABEL: or_u_v4i64_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvrepli.b $xr1, 1
-; CHECK-NEXT:    xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvori.b $xr0, $xr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = or <4 x i64> %a, splat (i64 72340172838076673)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll
index a518148fe696c..3f4c65590971d 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll
@@ -128,8 +128,7 @@ entry:
 define <16 x i16> @xor_u_v16i16_1(<16 x i16> %a) nounwind {
 ; CHECK-LABEL: xor_u_v16i16_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvrepli.b $xr1, 1
-; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvxori.b $xr0, $xr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = xor <16 x i16> %a, splat (i16 257)
@@ -139,8 +138,7 @@ entry:
 define <8 x i32> @xor_u_v8i32_1(<8 x i32> %a) nounwind {
 ; CHECK-LABEL: xor_u_v8i32_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvrepli.b $xr1, 1
-; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvxori.b $xr0, $xr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = xor <8 x i32> %a, splat (i32 16843009)
@@ -150,8 +148,7 @@ entry:
 define <4 x i64> @xor_u_v4i64_1(<4 x i64> %a) nounwind {
 ; CHECK-LABEL: xor_u_v4i64_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    xvrepli.b $xr1, 1
-; CHECK-NEXT:    xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT:    xvxori.b $xr0, $xr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = xor <4 x i64> %a, splat (i64 72340172838076673)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll b/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll
index 09908f619fa1f..ca61e69f1b378 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll
@@ -863,8 +863,7 @@ define i8 @xvmsk_ne_v4i32_concat_poison(<4 x i32> %vec) {
 ; CHECK-LABEL: xvmsk_ne_v4i32_concat_poison:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vseqi.w $vr0, $vr0, 0
-; CHECK-NEXT:    vrepli.b $vr1, -1
-; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vxori.b $vr0, $vr0, 255
 ; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 0
 ; CHECK-NEXT:    vinsgr2vr.h $vr1, $a0, 0
 ; CHECK-NEXT:    vpickve2gr.w $a0, $vr0, 1
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll
index 6ac7d51de253b..fe49a4c9e9cab 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll
@@ -125,8 +125,7 @@ define void @not_ctlz_v8i16(ptr %src, ptr %dst) nounwind {
 ; CHECK-LABEL: not_ctlz_v8i16:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vrepli.b $vr1, -1
-; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vxori.b $vr0, $vr0, 255
 ; CHECK-NEXT:    vclz.h $vr0, $vr0
 ; CHECK-NEXT:    vst $vr0, $a1, 0
 ; CHECK-NEXT:    ret
@@ -141,8 +140,7 @@ define void @not_ctlz_v4i32(ptr %src, ptr %dst) nounwind {
 ; CHECK-LABEL: not_ctlz_v4i32:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vrepli.b $vr1, -1
-; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vxori.b $vr0, $vr0, 255
 ; CHECK-NEXT:    vclz.w $vr0, $vr0
 ; CHECK-NEXT:    vst $vr0, $a1, 0
 ; CHECK-NEXT:    ret
@@ -157,8 +155,7 @@ define void @not_ctlz_v2i64(ptr %src, ptr %dst) nounwind {
 ; CHECK-LABEL: not_ctlz_v2i64:
 ; CHECK:       # %bb.0:
 ; CHECK-NEXT:    vld $vr0, $a0, 0
-; CHECK-NEXT:    vrepli.b $vr1, -1
-; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vxori.b $vr0, $vr0, 255
 ; CHECK-NEXT:    vclz.d $vr0, $vr0
 ; CHECK-NEXT:    vst $vr0, $a1, 0
 ; CHECK-NEXT:    ret
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll
index f90ae0d34cb8c..1e20b31b7c453 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll
@@ -128,8 +128,7 @@ entry:
 define <8 x i16> @and_u_v8i16_1(<8 x i16> %a) nounwind {
 ; CHECK-LABEL: and_u_v8i16_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vrepli.b $vr1, 1
-; CHECK-NEXT:    vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vandi.b $vr0, $vr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = and <8 x i16> %a, splat (i16 257)
@@ -139,8 +138,7 @@ entry:
 define <4 x i32> @and_u_v4i32_1(<4 x i32> %a) nounwind {
 ; CHECK-LABEL: and_u_v4i32_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vrepli.b $vr1, 1
-; CHECK-NEXT:    vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vandi.b $vr0, $vr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = and <4 x i32> %a, splat (i32 16843009)
@@ -150,8 +148,7 @@ entry:
 define <2 x i64> @and_u_v2i64_1(<2 x i64> %a) nounwind {
 ; CHECK-LABEL: and_u_v2i64_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vrepli.b $vr1, 1
-; CHECK-NEXT:    vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vandi.b $vr0, $vr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = and <2 x i64> %a, splat (i64 72340172838076673)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll
index 7166469bf5cee..859d604549454 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll
@@ -631,8 +631,7 @@ define void @v8i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-NEXT:    vld $vr0, $a1, 0
 ; CHECK-NEXT:    vld $vr1, $a2, 0
 ; CHECK-NEXT:    vseq.h $vr0, $vr0, $vr1
-; CHECK-NEXT:    vrepli.b $vr1, -1
-; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vxori.b $vr0, $vr0, 255
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <8 x i16>, ptr %a0
@@ -649,8 +648,7 @@ define void @v4i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-NEXT:    vld $vr0, $a1, 0
 ; CHECK-NEXT:    vld $vr1, $a2, 0
 ; CHECK-NEXT:    vseq.w $vr0, $vr0, $vr1
-; CHECK-NEXT:    vrepli.b $vr1, -1
-; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vxori.b $vr0, $vr0, 255
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <4 x i32>, ptr %a0
@@ -667,8 +665,7 @@ define void @v2i64_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
 ; CHECK-NEXT:    vld $vr0, $a1, 0
 ; CHECK-NEXT:    vld $vr1, $a2, 0
 ; CHECK-NEXT:    vseq.d $vr0, $vr0, $vr1
-; CHECK-NEXT:    vrepli.b $vr1, -1
-; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vxori.b $vr0, $vr0, 255
 ; CHECK-NEXT:    vst $vr0, $a0, 0
 ; CHECK-NEXT:    ret
   %v0 = load <2 x i64>, ptr %a0
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/nor.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/nor.ll
index a23899c81ca04..f297262010951 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/nor.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/nor.ll
@@ -136,8 +136,7 @@ entry:
 define <8 x i16> @nor_u_v8i16_1(<8 x i16> %a) nounwind {
 ; CHECK-LABEL: nor_u_v8i16_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vrepli.b $vr1, 1
-; CHECK-NEXT:    vnor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vnori.b $vr0, $vr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = or <8 x i16> %a, splat (i16 257)
@@ -148,8 +147,7 @@ entry:
 define <4 x i32> @nor_u_v4i32_1(<4 x i32> %a) nounwind {
 ; CHECK-LABEL: nor_u_v4i32_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vrepli.b $vr1, 1
-; CHECK-NEXT:    vnor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vnori.b $vr0, $vr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = or <4 x i32> %a, splat (i32 16843009)
@@ -160,8 +158,7 @@ entry:
 define <2 x i64> @nor_u_v2i64_1(<2 x i64> %a) nounwind {
 ; CHECK-LABEL: nor_u_v2i64_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vrepli.b $vr1, 1
-; CHECK-NEXT:    vnor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vnori.b $vr0, $vr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = or <2 x i64> %a, splat (i64 72340172838076673)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll
index 7188783b0ce56..3bc845bd8c416 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll
@@ -128,8 +128,7 @@ entry:
 define <8 x i16> @or_u_v8i16_1(<8 x i16> %a) nounwind {
 ; CHECK-LABEL: or_u_v8i16_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vrepli.b $vr1, 1
-; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vori.b $vr0, $vr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = or <8 x i16> %a, splat (i16 257)
@@ -139,8 +138,7 @@ entry:
 define <4 x i32> @or_u_v4i32_1(<4 x i32> %a) nounwind {
 ; CHECK-LABEL: or_u_v4i32_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vrepli.b $vr1, 1
-; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vori.b $vr0, $vr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = or <4 x i32> %a, splat (i32 16843009)
@@ -150,8 +148,7 @@ entry:
 define <2 x i64> @or_u_v2i64_1(<2 x i64> %a) nounwind {
 ; CHECK-LABEL: or_u_v2i64_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vrepli.b $vr1, 1
-; CHECK-NEXT:    vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vori.b $vr0, $vr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = or <2 x i64> %a, splat (i64 72340172838076673)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll
index e7298fc28273e..e4a087218995d 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll
@@ -128,8 +128,7 @@ entry:
 define <8 x i16> @xor_u_v8i16_1(<8 x i16> %a) nounwind {
 ; CHECK-LABEL: xor_u_v8i16_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vrepli.b $vr1, 1
-; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vxori.b $vr0, $vr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = xor <8 x i16> %a, splat (i16 257)
@@ -139,8 +138,7 @@ entry:
 define <4 x i32> @xor_u_v4i32_1(<4 x i32> %a) nounwind {
 ; CHECK-LABEL: xor_u_v4i32_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vrepli.b $vr1, 1
-; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vxori.b $vr0, $vr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = xor <4 x i32> %a, splat (i32 16843009)
@@ -150,8 +148,7 @@ entry:
 define <2 x i64> @xor_u_v2i64_1(<2 x i64> %a) nounwind {
 ; CHECK-LABEL: xor_u_v2i64_1:
 ; CHECK:       # %bb.0: # %entry
-; CHECK-NEXT:    vrepli.b $vr1, 1
-; CHECK-NEXT:    vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT:    vxori.b $vr0, $vr0, 1
 ; CHECK-NEXT:    ret
 entry:
   %0 = xor <2 x i64> %a, splat (i64 72340172838076673)
diff --git a/llvm/test/CodeGen/LoongArch/pr177863.ll b/llvm/test/CodeGen/LoongArch/pr177863.ll
index 8edbd33ace133..54fa02162ee0d 100644
--- a/llvm/test/CodeGen/LoongArch/pr177863.ll
+++ b/llvm/test/CodeGen/LoongArch/pr177863.ll
@@ -15,8 +15,7 @@ define <4 x i1> @test(<4 x i64> %shuffle2, <4 x i64> %shuffle4) {
 ; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 2
 ; LA32-NEXT:    xvpickve2gr.w $a0, $xr0, 6
 ; LA32-NEXT:    vinsgr2vr.w $vr1, $a0, 3
-; LA32-NEXT:    vrepli.b $vr0, -1
-; LA32-NEXT:    vxor.v $vr0, $vr1, $vr0
+; LA32-NEXT:    vxori.b $vr0, $vr1, 255
 ; LA32-NEXT:    ret
 ;
 ; LA64-LABEL: test:
@@ -30,8 +29,7 @@ define <4 x i1> @test(<4 x i64> %shuffle2, <4 x i64> %shuffle4) {
 ; LA64-NEXT:    vinsgr2vr.w $vr1, $a0, 2
 ; LA64-NEXT:    xvpickve2gr.d $a0, $xr0, 3
 ; LA64-NEXT:    vinsgr2vr.w $vr1, $a0, 3
-; LA64-NEXT:    vrepli.b $vr0, -1
-; LA64-NEXT:    vxor.v $vr0, $vr1, $vr0
+; LA64-NEXT:    vxori.b $vr0, $vr1, 255
 ; LA64-NEXT:    ret
 entry:
   %conv5 = trunc nuw <4 x i64> %shuffle4 to <4 x i32>