[llvm-branch-commits] [llvm] [LoongArch] Select `V{AND, OR, XOR, NOR}I.B` for bitwise with byte splat immediates (PR #192217)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Wed Apr 15 02:14:05 PDT 2026
https://github.com/heiher created https://github.com/llvm/llvm-project/pull/192217
The `V{AND,OR,XOR,NOR}I.B` instructions operate on byte elements and accept an 8-bit immediate. However, when the same byte splat constant is used with wider vector element types (e.g. v8i16, v4i32, v2i64), instruction selection currently falls back to materializing the constant in a temporary register.
```
vrepli.b -1
vxor.v
```
even though the immediate form is available:
```
vxori.b 255
```
This happens because selectVSplatImm requires the splat bit width to match the vector element size, preventing matching byte splat immediates for non-i8 vector types.
Generalize selectVSplatImm to optionally accept an explicit element bit width and introduce a new vsplat_i8_uimm8 pattern to detect byte splat immediates independently of the vector element type. Use this pattern to extend the V*I.B instruction patterns to wider vector element types.
This enables direct selection of byte-immediate bitwise instructions, reduces instruction count, and avoids unnecessary temporary registers in common mask operations.
>From 49d7237290ad724fec65ce168a5e9fe99ece7943 Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui at loongson.cn>
Date: Tue, 14 Apr 2026 22:23:10 +0800
Subject: [PATCH] [LoongArch] Select `V{AND,OR,XOR,NOR}I.B` for bitwise with
byte splat immediates
The `V{AND,OR,XOR,NOR}I.B` instructions operate on byte elements and accept
an 8-bit immediate. However, when the same byte splat constant is used with
wider vector element types (e.g. v8i16, v4i32, v2i64), instruction selection
currently falls back to materializing the constant in a temporary register.
```
vrepli.b -1
vxor.v
```
even though the immediate form is available:
```
vxori.b 255
```
This happens because selectVSplatImm requires the splat bit width to match
the vector element size, preventing matching byte splat immediates for
non-i8 vector types.
Generalize selectVSplatImm to optionally accept an explicit element bit
width and introduce a new vsplat_i8_uimm8 pattern to detect byte splat
immediates independently of the vector element type. Use this pattern to
extend the V*I.B instruction patterns to wider vector element types.
This enables direct selection of byte-immediate bitwise instructions,
reduces instruction count, and avoids unnecessary temporary registers in
common mask operations.
---
.../AsmParser/LoongArchAsmParser.cpp | 1 +
.../LoongArch/LoongArchISelDAGToDAG.cpp | 7 +++--
.../Target/LoongArch/LoongArchISelDAGToDAG.h | 2 +-
.../Target/LoongArch/LoongArchInstrInfo.td | 5 ++-
.../LoongArch/LoongArchLASXInstrInfo.td | 24 ++++++++++++++
.../Target/LoongArch/LoongArchLSXInstrInfo.td | 31 +++++++++++++++++--
.../test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll | 9 ++----
.../LoongArch/lasx/ir-instruction/and.ll | 9 ++----
.../LoongArch/lasx/ir-instruction/icmp.ll | 9 ++----
.../LoongArch/lasx/ir-instruction/nor.ll | 9 ++----
.../LoongArch/lasx/ir-instruction/or.ll | 9 ++----
.../LoongArch/lasx/ir-instruction/xor.ll | 9 ++----
llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll | 3 +-
llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll | 9 ++----
.../LoongArch/lsx/ir-instruction/and.ll | 9 ++----
.../LoongArch/lsx/ir-instruction/icmp.ll | 9 ++----
.../LoongArch/lsx/ir-instruction/nor.ll | 9 ++----
.../LoongArch/lsx/ir-instruction/or.ll | 9 ++----
.../LoongArch/lsx/ir-instruction/xor.ll | 9 ++----
llvm/test/CodeGen/LoongArch/pr177863.ll | 6 ++--
20 files changed, 102 insertions(+), 85 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
index c8c3a2bc9ce6d..04b8a75623925 100644
--- a/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
+++ b/llvm/lib/Target/LoongArch/AsmParser/LoongArchAsmParser.cpp
@@ -552,6 +552,7 @@ class LoongArchOperand : public MCParsedAsmOperand {
IsValidKind;
}
+ bool isImm16() const { return isSImm<16>() || isUImm<16>(); }
bool isImm32() const { return isSImm<32>() || isUImm<32>(); }
bool isImm64() const {
if (!isImm())
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
index 442f0a46a4983..265b86a4c469e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
@@ -400,16 +400,17 @@ bool LoongArchDAGToDAGISel::selectVSplat(SDNode *N, APInt &Imm,
return true;
}
-template <unsigned ImmBitSize, bool IsSigned>
+template <unsigned ImmBitSize, unsigned EltBitSize, bool IsSigned>
bool LoongArchDAGToDAGISel::selectVSplatImm(SDValue N, SDValue &SplatVal) {
APInt ImmValue;
EVT EltTy = N->getValueType(0).getVectorElementType();
+ unsigned EltBitWidth = EltBitSize ? EltBitSize : EltTy.getSizeInBits();
if (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0);
- if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
- ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+ if (selectVSplat(N.getNode(), ImmValue, EltBitWidth) &&
+ ImmValue.getBitWidth() == EltBitWidth) {
if (IsSigned && ImmValue.isSignedIntN(ImmBitSize)) {
SplatVal = CurDAG->getSignedTargetConstant(
ImmValue.getSExtValue(), SDLoc(N), Subtarget->getGRLenVT());
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
index 4c8dcb8fa48af..60dcfd9dac62a 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
@@ -60,7 +60,7 @@ class LoongArchDAGToDAGISel : public SelectionDAGISel {
bool selectVSplat(SDNode *N, APInt &Imm, unsigned MinSizeInBits) const;
- template <unsigned ImmSize, bool IsSigned = false>
+ template <unsigned ImmSize, unsigned EltSize = 0, bool IsSigned = false>
bool selectVSplatImm(SDValue N, SDValue &SplatVal);
bool selectVSplatUimmInvPow2(SDValue N, SDValue &SplatImm) const;
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 555dc31e77755..f51ff39714267 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -279,8 +279,11 @@ def GPRMemAtomic : RegisterOperand<GPR> {
let PrintMethod = "printAtomicMemOp";
}
-// A parameterized register class alternative to i32imm/i64imm from Target.td.
+// A parameterized register class alternative to i16imm/i32imm/i64imm from Target.td.
def grlenimm : Operand<GRLenVT>;
+def imm16 : Operand<GRLenVT> {
+ let ParserMatchClass = ImmAsmOperand<"", 16, "">;
+}
def imm32 : Operand<GRLenVT> {
let ParserMatchClass = ImmAsmOperand<"", 32, "">;
}
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 1b18d16a4cabb..0b82ed114d193 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1412,15 +1412,39 @@ def : Pat<(or (vt LASX256:$xj), (vt (vnot LASX256:$xk))),
// XVANDI_B
def : Pat<(and (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))),
(XVANDI_B LASX256:$xj, uimm8:$imm)>;
+def : Pat<(and (v16i16 LASX256:$xj), (v16i16 (vsplat_i8_uimm8 imm16:$imm))),
+ (XVANDI_B LASX256:$xj, imm16:$imm)>;
+def : Pat<(and (v8i32 LASX256:$xj), (v8i32 (vsplat_i8_uimm8 imm32:$imm))),
+ (XVANDI_B LASX256:$xj, imm32:$imm)>;
+def : Pat<(and (v4i64 LASX256:$xj), (v4i64 (vsplat_i8_uimm8 imm64:$imm))),
+ (XVANDI_B LASX256:$xj, imm64:$imm)>;
// XVORI_B
def : Pat<(or (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))),
(XVORI_B LASX256:$xj, uimm8:$imm)>;
+def : Pat<(or (v16i16 LASX256:$xj), (v16i16 (vsplat_i8_uimm8 imm16:$imm))),
+ (XVORI_B LASX256:$xj, imm16:$imm)>;
+def : Pat<(or (v8i32 LASX256:$xj), (v8i32 (vsplat_i8_uimm8 imm32:$imm))),
+ (XVORI_B LASX256:$xj, imm32:$imm)>;
+def : Pat<(or (v4i64 LASX256:$xj), (v4i64 (vsplat_i8_uimm8 imm64:$imm))),
+ (XVORI_B LASX256:$xj, imm64:$imm)>;
// XVXORI_B
def : Pat<(xor (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm))),
(XVXORI_B LASX256:$xj, uimm8:$imm)>;
+def : Pat<(xor (v16i16 LASX256:$xj), (v16i16 (vsplat_i8_uimm8 imm16:$imm))),
+ (XVXORI_B LASX256:$xj, imm16:$imm)>;
+def : Pat<(xor (v8i32 LASX256:$xj), (v8i32 (vsplat_i8_uimm8 imm32:$imm))),
+ (XVXORI_B LASX256:$xj, imm32:$imm)>;
+def : Pat<(xor (v4i64 LASX256:$xj), (v4i64 (vsplat_i8_uimm8 imm64:$imm))),
+ (XVXORI_B LASX256:$xj, imm64:$imm)>;
// XVNORI_B
def : Pat<(vnot (or (v32i8 LASX256:$xj), (v32i8 (SplatPat_uimm8 uimm8:$imm)))),
(XVNORI_B LASX256:$xj, uimm8:$imm)>;
+def : Pat<(vnot (or (v16i16 LASX256:$xj), (v16i16 (vsplat_i8_uimm8 imm16:$imm)))),
+ (XVNORI_B LASX256:$xj, imm16:$imm)>;
+def : Pat<(vnot (or (v8i32 LASX256:$xj), (v8i32 (vsplat_i8_uimm8 imm32:$imm)))),
+ (XVNORI_B LASX256:$xj, imm32:$imm)>;
+def : Pat<(vnot (or (v4i64 LASX256:$xj), (v4i64 (vsplat_i8_uimm8 imm64:$imm)))),
+ (XVNORI_B LASX256:$xj, imm64:$imm)>;
// XVBSLL_V
foreach vt = [v32i8, v16i16, v8i32, v4i64, v8f32,
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index dbc0d68f2254f..f7927cb64c13b 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -196,13 +196,16 @@ def vsplati64imm63 : PatFrag<(ops node:$reg),
(and node:$reg, vsplati64_imm_eq_63)>;
foreach N = [3, 4, 5, 6, 8] in
- def SplatPat_uimm#N : ComplexPattern<vAny, 1, "selectVSplatImm<"#N#">",
+ def SplatPat_uimm#N : ComplexPattern<vAny, 1, "selectVSplatImm<"#N#", 0>",
[build_vector, bitconvert], [], 2>;
foreach N = [5] in
- def SplatPat_simm#N : ComplexPattern<vAny, 1, "selectVSplatImm<"#N#", true>",
+ def SplatPat_simm#N : ComplexPattern<vAny, 1, "selectVSplatImm<"#N#", 0, true>",
[build_vector, bitconvert]>;
+def vsplat_i8_uimm8 : ComplexPattern<vAny, 1, "selectVSplatImm<8, 8>",
+ [build_vector, bitconvert]>;
+
def vsplat_uimm_inv_pow2 : ComplexPattern<vAny, 1, "selectVSplatUimmInvPow2",
[build_vector, bitconvert]>;
@@ -1622,15 +1625,39 @@ def : Pat<(or (vt LSX128:$vj), (vt (vnot LSX128:$vk))),
// VANDI_B
def : Pat<(and (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))),
(VANDI_B LSX128:$vj, uimm8:$imm)>;
+def : Pat<(and (v8i16 LSX128:$vj), (v8i16 (vsplat_i8_uimm8 imm16:$imm))),
+ (VANDI_B LSX128:$vj, imm16:$imm)>;
+def : Pat<(and (v4i32 LSX128:$vj), (v4i32 (vsplat_i8_uimm8 imm32:$imm))),
+ (VANDI_B LSX128:$vj, imm32:$imm)>;
+def : Pat<(and (v2i64 LSX128:$vj), (v2i64 (vsplat_i8_uimm8 imm64:$imm))),
+ (VANDI_B LSX128:$vj, imm64:$imm)>;
// VORI_B
def : Pat<(or (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))),
(VORI_B LSX128:$vj, uimm8:$imm)>;
+def : Pat<(or (v8i16 LSX128:$vj), (v8i16 (vsplat_i8_uimm8 imm16:$imm))),
+ (VORI_B LSX128:$vj, imm16:$imm)>;
+def : Pat<(or (v4i32 LSX128:$vj), (v4i32 (vsplat_i8_uimm8 imm32:$imm))),
+ (VORI_B LSX128:$vj, imm32:$imm)>;
+def : Pat<(or (v2i64 LSX128:$vj), (v2i64 (vsplat_i8_uimm8 imm64:$imm))),
+ (VORI_B LSX128:$vj, imm64:$imm)>;
// VXORI_B
def : Pat<(xor (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm))),
(VXORI_B LSX128:$vj, uimm8:$imm)>;
+def : Pat<(xor (v8i16 LSX128:$vj), (v8i16 (vsplat_i8_uimm8 imm16:$imm))),
+ (VXORI_B LSX128:$vj, imm16:$imm)>;
+def : Pat<(xor (v4i32 LSX128:$vj), (v4i32 (vsplat_i8_uimm8 imm32:$imm))),
+ (VXORI_B LSX128:$vj, imm32:$imm)>;
+def : Pat<(xor (v2i64 LSX128:$vj), (v2i64 (vsplat_i8_uimm8 imm64:$imm))),
+ (VXORI_B LSX128:$vj, imm64:$imm)>;
// VNORI_B
def : Pat<(vnot (or (v16i8 LSX128:$vj), (v16i8 (SplatPat_uimm8 uimm8:$imm)))),
(VNORI_B LSX128:$vj, uimm8:$imm)>;
+def : Pat<(vnot (or (v8i16 LSX128:$vj), (v8i16 (vsplat_i8_uimm8 imm16:$imm)))),
+ (VNORI_B LSX128:$vj, imm16:$imm)>;
+def : Pat<(vnot (or (v4i32 LSX128:$vj), (v4i32 (vsplat_i8_uimm8 imm32:$imm)))),
+ (VNORI_B LSX128:$vj, imm32:$imm)>;
+def : Pat<(vnot (or (v2i64 LSX128:$vj), (v2i64 (vsplat_i8_uimm8 imm64:$imm)))),
+ (VNORI_B LSX128:$vj, imm64:$imm)>;
// VBSLL_V
foreach vt = [v16i8, v8i16, v4i32, v2i64, v4f32,
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll
index b3155c9313a8a..9b40d400c9970 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ctpop-ctlz.ll
@@ -125,8 +125,7 @@ define void @not_ctlz_v16i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: not_ctlz_v16i16:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvrepli.b $xr1, -1
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvxori.b $xr0, $xr0, 255
; CHECK-NEXT: xvclz.h $xr0, $xr0
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: ret
@@ -141,8 +140,7 @@ define void @not_ctlz_v8i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: not_ctlz_v8i32:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvrepli.b $xr1, -1
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvxori.b $xr0, $xr0, 255
; CHECK-NEXT: xvclz.w $xr0, $xr0
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: ret
@@ -157,8 +155,7 @@ define void @not_ctlz_v4i64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: not_ctlz_v4i64:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a0, 0
-; CHECK-NEXT: xvrepli.b $xr1, -1
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvxori.b $xr0, $xr0, 255
; CHECK-NEXT: xvclz.d $xr0, $xr0
; CHECK-NEXT: xvst $xr0, $a1, 0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll
index 91f50f28464d2..7bd56e427c1cb 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/and.ll
@@ -128,8 +128,7 @@ entry:
define <16 x i16> @and_u_v16i16_1(<16 x i16> %a) nounwind {
; CHECK-LABEL: and_u_v16i16_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.b $xr1, 1
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvandi.b $xr0, $xr0, 1
; CHECK-NEXT: ret
entry:
%0 = and <16 x i16> %a, splat (i16 257)
@@ -139,8 +138,7 @@ entry:
define <8 x i32> @and_u_v8i32_1(<8 x i32> %a) nounwind {
; CHECK-LABEL: and_u_v8i32_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.b $xr1, 1
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvandi.b $xr0, $xr0, 1
; CHECK-NEXT: ret
entry:
%0 = and <8 x i32> %a, splat (i32 16843009)
@@ -150,8 +148,7 @@ entry:
define <4 x i64> @and_u_v4i64_1(<4 x i64> %a) nounwind {
; CHECK-LABEL: and_u_v4i64_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.b $xr1, 1
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvandi.b $xr0, $xr0, 1
; CHECK-NEXT: ret
entry:
%0 = and <4 x i64> %a, splat (i64 72340172838076673)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll
index 47229fc9a0fc4..4ea54abacc6f6 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/icmp.ll
@@ -631,8 +631,7 @@ define void @v16i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
; CHECK-NEXT: xvseq.h $xr0, $xr0, $xr1
-; CHECK-NEXT: xvrepli.b $xr1, -1
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvxori.b $xr0, $xr0, 255
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <16 x i16>, ptr %a0
@@ -649,8 +648,7 @@ define void @v8i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
; CHECK-NEXT: xvseq.w $xr0, $xr0, $xr1
-; CHECK-NEXT: xvrepli.b $xr1, -1
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvxori.b $xr0, $xr0, 255
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <8 x i32>, ptr %a0
@@ -667,8 +665,7 @@ define void @v4i64_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvld $xr1, $a2, 0
; CHECK-NEXT: xvseq.d $xr0, $xr0, $xr1
-; CHECK-NEXT: xvrepli.b $xr1, -1
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvxori.b $xr0, $xr0, 255
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <4 x i64>, ptr %a0
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/nor.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/nor.ll
index 691b98b0b1646..0aa3fde735e5c 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/nor.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/nor.ll
@@ -136,8 +136,7 @@ entry:
define <16 x i16> @nor_u_v16i16_1(<16 x i16> %a) nounwind {
; CHECK-LABEL: nor_u_v16i16_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.b $xr1, 1
-; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvnori.b $xr0, $xr0, 1
; CHECK-NEXT: ret
entry:
%0 = or <16 x i16> %a, splat (i16 257)
@@ -148,8 +147,7 @@ entry:
define <8 x i32> @nor_u_v8i32_1(<8 x i32> %a) nounwind {
; CHECK-LABEL: nor_u_v8i32_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.b $xr1, 1
-; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvnori.b $xr0, $xr0, 1
; CHECK-NEXT: ret
entry:
%0 = or <8 x i32> %a, splat (i32 16843009)
@@ -160,8 +158,7 @@ entry:
define <4 x i64> @nor_u_v4i64_1(<4 x i64> %a) nounwind {
; CHECK-LABEL: nor_u_v4i64_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.b $xr1, 1
-; CHECK-NEXT: xvnor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvnori.b $xr0, $xr0, 1
; CHECK-NEXT: ret
entry:
%0 = or <4 x i64> %a, splat (i64 72340172838076673)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll
index a6f5e473d643d..174a0f1562bed 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/or.ll
@@ -128,8 +128,7 @@ entry:
define <16 x i16> @or_u_v16i16_1(<16 x i16> %a) nounwind {
; CHECK-LABEL: or_u_v16i16_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.b $xr1, 1
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvori.b $xr0, $xr0, 1
; CHECK-NEXT: ret
entry:
%0 = or <16 x i16> %a, splat (i16 257)
@@ -139,8 +138,7 @@ entry:
define <8 x i32> @or_u_v8i32_1(<8 x i32> %a) nounwind {
; CHECK-LABEL: or_u_v8i32_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.b $xr1, 1
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvori.b $xr0, $xr0, 1
; CHECK-NEXT: ret
entry:
%0 = or <8 x i32> %a, splat (i32 16843009)
@@ -150,8 +148,7 @@ entry:
define <4 x i64> @or_u_v4i64_1(<4 x i64> %a) nounwind {
; CHECK-LABEL: or_u_v4i64_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.b $xr1, 1
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvori.b $xr0, $xr0, 1
; CHECK-NEXT: ret
entry:
%0 = or <4 x i64> %a, splat (i64 72340172838076673)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll
index a518148fe696c..3f4c65590971d 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/ir-instruction/xor.ll
@@ -128,8 +128,7 @@ entry:
define <16 x i16> @xor_u_v16i16_1(<16 x i16> %a) nounwind {
; CHECK-LABEL: xor_u_v16i16_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.b $xr1, 1
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvxori.b $xr0, $xr0, 1
; CHECK-NEXT: ret
entry:
%0 = xor <16 x i16> %a, splat (i16 257)
@@ -139,8 +138,7 @@ entry:
define <8 x i32> @xor_u_v8i32_1(<8 x i32> %a) nounwind {
; CHECK-LABEL: xor_u_v8i32_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.b $xr1, 1
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvxori.b $xr0, $xr0, 1
; CHECK-NEXT: ret
entry:
%0 = xor <8 x i32> %a, splat (i32 16843009)
@@ -150,8 +148,7 @@ entry:
define <4 x i64> @xor_u_v4i64_1(<4 x i64> %a) nounwind {
; CHECK-LABEL: xor_u_v4i64_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.b $xr1, 1
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvxori.b $xr0, $xr0, 1
; CHECK-NEXT: ret
entry:
%0 = xor <4 x i64> %a, splat (i64 72340172838076673)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll b/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll
index 09908f619fa1f..ca61e69f1b378 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/xvmskcond.ll
@@ -863,8 +863,7 @@ define i8 @xvmsk_ne_v4i32_concat_poison(<4 x i32> %vec) {
; CHECK-LABEL: xvmsk_ne_v4i32_concat_poison:
; CHECK: # %bb.0:
; CHECK-NEXT: vseqi.w $vr0, $vr0, 0
-; CHECK-NEXT: vrepli.b $vr1, -1
-; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vxori.b $vr0, $vr0, 255
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 0
; CHECK-NEXT: vinsgr2vr.h $vr1, $a0, 0
; CHECK-NEXT: vpickve2gr.w $a0, $vr0, 1
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll
index 6ac7d51de253b..fe49a4c9e9cab 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ctpop-ctlz.ll
@@ -125,8 +125,7 @@ define void @not_ctlz_v8i16(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: not_ctlz_v8i16:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vrepli.b $vr1, -1
-; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vxori.b $vr0, $vr0, 255
; CHECK-NEXT: vclz.h $vr0, $vr0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
@@ -141,8 +140,7 @@ define void @not_ctlz_v4i32(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: not_ctlz_v4i32:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vrepli.b $vr1, -1
-; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vxori.b $vr0, $vr0, 255
; CHECK-NEXT: vclz.w $vr0, $vr0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
@@ -157,8 +155,7 @@ define void @not_ctlz_v2i64(ptr %src, ptr %dst) nounwind {
; CHECK-LABEL: not_ctlz_v2i64:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a0, 0
-; CHECK-NEXT: vrepli.b $vr1, -1
-; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vxori.b $vr0, $vr0, 255
; CHECK-NEXT: vclz.d $vr0, $vr0
; CHECK-NEXT: vst $vr0, $a1, 0
; CHECK-NEXT: ret
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll
index f90ae0d34cb8c..1e20b31b7c453 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/and.ll
@@ -128,8 +128,7 @@ entry:
define <8 x i16> @and_u_v8i16_1(<8 x i16> %a) nounwind {
; CHECK-LABEL: and_u_v8i16_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.b $vr1, 1
-; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vandi.b $vr0, $vr0, 1
; CHECK-NEXT: ret
entry:
%0 = and <8 x i16> %a, splat (i16 257)
@@ -139,8 +138,7 @@ entry:
define <4 x i32> @and_u_v4i32_1(<4 x i32> %a) nounwind {
; CHECK-LABEL: and_u_v4i32_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.b $vr1, 1
-; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vandi.b $vr0, $vr0, 1
; CHECK-NEXT: ret
entry:
%0 = and <4 x i32> %a, splat (i32 16843009)
@@ -150,8 +148,7 @@ entry:
define <2 x i64> @and_u_v2i64_1(<2 x i64> %a) nounwind {
; CHECK-LABEL: and_u_v2i64_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.b $vr1, 1
-; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vandi.b $vr0, $vr0, 1
; CHECK-NEXT: ret
entry:
%0 = and <2 x i64> %a, splat (i64 72340172838076673)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll
index 7166469bf5cee..859d604549454 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/icmp.ll
@@ -631,8 +631,7 @@ define void @v8i16_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: vseq.h $vr0, $vr0, $vr1
-; CHECK-NEXT: vrepli.b $vr1, -1
-; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vxori.b $vr0, $vr0, 255
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <8 x i16>, ptr %a0
@@ -649,8 +648,7 @@ define void @v4i32_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: vseq.w $vr0, $vr0, $vr1
-; CHECK-NEXT: vrepli.b $vr1, -1
-; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vxori.b $vr0, $vr0, 255
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <4 x i32>, ptr %a0
@@ -667,8 +665,7 @@ define void @v2i64_icmp_ne(ptr %res, ptr %a0, ptr %a1) nounwind {
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vld $vr1, $a2, 0
; CHECK-NEXT: vseq.d $vr0, $vr0, $vr1
-; CHECK-NEXT: vrepli.b $vr1, -1
-; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vxori.b $vr0, $vr0, 255
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <2 x i64>, ptr %a0
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/nor.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/nor.ll
index a23899c81ca04..f297262010951 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/nor.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/nor.ll
@@ -136,8 +136,7 @@ entry:
define <8 x i16> @nor_u_v8i16_1(<8 x i16> %a) nounwind {
; CHECK-LABEL: nor_u_v8i16_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.b $vr1, 1
-; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vnori.b $vr0, $vr0, 1
; CHECK-NEXT: ret
entry:
%0 = or <8 x i16> %a, splat (i16 257)
@@ -148,8 +147,7 @@ entry:
define <4 x i32> @nor_u_v4i32_1(<4 x i32> %a) nounwind {
; CHECK-LABEL: nor_u_v4i32_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.b $vr1, 1
-; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vnori.b $vr0, $vr0, 1
; CHECK-NEXT: ret
entry:
%0 = or <4 x i32> %a, splat (i32 16843009)
@@ -160,8 +158,7 @@ entry:
define <2 x i64> @nor_u_v2i64_1(<2 x i64> %a) nounwind {
; CHECK-LABEL: nor_u_v2i64_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.b $vr1, 1
-; CHECK-NEXT: vnor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vnori.b $vr0, $vr0, 1
; CHECK-NEXT: ret
entry:
%0 = or <2 x i64> %a, splat (i64 72340172838076673)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll
index 7188783b0ce56..3bc845bd8c416 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/or.ll
@@ -128,8 +128,7 @@ entry:
define <8 x i16> @or_u_v8i16_1(<8 x i16> %a) nounwind {
; CHECK-LABEL: or_u_v8i16_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.b $vr1, 1
-; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vori.b $vr0, $vr0, 1
; CHECK-NEXT: ret
entry:
%0 = or <8 x i16> %a, splat (i16 257)
@@ -139,8 +138,7 @@ entry:
define <4 x i32> @or_u_v4i32_1(<4 x i32> %a) nounwind {
; CHECK-LABEL: or_u_v4i32_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.b $vr1, 1
-; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vori.b $vr0, $vr0, 1
; CHECK-NEXT: ret
entry:
%0 = or <4 x i32> %a, splat (i32 16843009)
@@ -150,8 +148,7 @@ entry:
define <2 x i64> @or_u_v2i64_1(<2 x i64> %a) nounwind {
; CHECK-LABEL: or_u_v2i64_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.b $vr1, 1
-; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vori.b $vr0, $vr0, 1
; CHECK-NEXT: ret
entry:
%0 = or <2 x i64> %a, splat (i64 72340172838076673)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll
index e7298fc28273e..e4a087218995d 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/ir-instruction/xor.ll
@@ -128,8 +128,7 @@ entry:
define <8 x i16> @xor_u_v8i16_1(<8 x i16> %a) nounwind {
; CHECK-LABEL: xor_u_v8i16_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.b $vr1, 1
-; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vxori.b $vr0, $vr0, 1
; CHECK-NEXT: ret
entry:
%0 = xor <8 x i16> %a, splat (i16 257)
@@ -139,8 +138,7 @@ entry:
define <4 x i32> @xor_u_v4i32_1(<4 x i32> %a) nounwind {
; CHECK-LABEL: xor_u_v4i32_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.b $vr1, 1
-; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vxori.b $vr0, $vr0, 1
; CHECK-NEXT: ret
entry:
%0 = xor <4 x i32> %a, splat (i32 16843009)
@@ -150,8 +148,7 @@ entry:
define <2 x i64> @xor_u_v2i64_1(<2 x i64> %a) nounwind {
; CHECK-LABEL: xor_u_v2i64_1:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.b $vr1, 1
-; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vxori.b $vr0, $vr0, 1
; CHECK-NEXT: ret
entry:
%0 = xor <2 x i64> %a, splat (i64 72340172838076673)
diff --git a/llvm/test/CodeGen/LoongArch/pr177863.ll b/llvm/test/CodeGen/LoongArch/pr177863.ll
index 8edbd33ace133..54fa02162ee0d 100644
--- a/llvm/test/CodeGen/LoongArch/pr177863.ll
+++ b/llvm/test/CodeGen/LoongArch/pr177863.ll
@@ -15,8 +15,7 @@ define <4 x i1> @test(<4 x i64> %shuffle2, <4 x i64> %shuffle4) {
; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 2
; LA32-NEXT: xvpickve2gr.w $a0, $xr0, 6
; LA32-NEXT: vinsgr2vr.w $vr1, $a0, 3
-; LA32-NEXT: vrepli.b $vr0, -1
-; LA32-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vxori.b $vr0, $vr1, 255
; LA32-NEXT: ret
;
; LA64-LABEL: test:
@@ -30,8 +29,7 @@ define <4 x i1> @test(<4 x i64> %shuffle2, <4 x i64> %shuffle4) {
; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 2
; LA64-NEXT: xvpickve2gr.d $a0, $xr0, 3
; LA64-NEXT: vinsgr2vr.w $vr1, $a0, 3
-; LA64-NEXT: vrepli.b $vr0, -1
-; LA64-NEXT: vxor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vxori.b $vr0, $vr1, 255
; LA64-NEXT: ret
entry:
%conv5 = trunc nuw <4 x i64> %shuffle4 to <4 x i32>
More information about the llvm-branch-commits
mailing list