[llvm-branch-commits] [llvm] [LoongArch] Support VBIT{CLR, SET, REV}I patterns for non-native element sizes (PR #193719)
via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Apr 27 02:08:17 PDT 2026
https://github.com/heiher updated https://github.com/llvm/llvm-project/pull/193719
>From 257371eb98298a2d58b982ed0893823d9bff2f51 Mon Sep 17 00:00:00 2001
From: WANG Rui <wangrui at loongson.cn>
Date: Thu, 23 Apr 2026 14:26:20 +0800
Subject: [PATCH 1/2] [LoongArch] Support VBIT{CLR,SET,REV}I patterns for
non-native element sizes
Extend vsplat_uimm_{pow2,inv_pow2} matching to allow specifying an explicit
element bit width, enabling recognition of splat patterns whose logical
element size differs from the vector's native element type.
Introduce templated selectVSplatUimm{Pow2,InvPow2} helpers with an optional
EltSize parameter, and add corresponding ComplexPattern definitions for
i8/i16/i32 element widths. This allows TableGen patterns to match cases such
as operating on v8i32/v4i64 vectors with masks derived from smaller element
sizes (e.g. i16).
With these changes, AND/OR/XOR operations using inverse power-of-two or
power-of-two splat masks are now correctly selected to VBITCLRI, VBITSETI,
and VBITREVI instructions instead of falling back to vector logical
operations with materialized constants.
---
.../LoongArch/LoongArchISelDAGToDAG.cpp | 12 ++++---
.../Target/LoongArch/LoongArchISelDAGToDAG.h | 3 +-
.../LoongArch/LoongArchLASXInstrInfo.td | 27 ++++++++++++++
.../Target/LoongArch/LoongArchLSXInstrInfo.td | 35 +++++++++++++++++++
llvm/test/CodeGen/LoongArch/lasx/bitclr.ll | 9 ++---
llvm/test/CodeGen/LoongArch/lasx/bitrev.ll | 9 ++---
llvm/test/CodeGen/LoongArch/lasx/bitset.ll | 9 ++---
llvm/test/CodeGen/LoongArch/lsx/bitclr.ll | 9 ++---
llvm/test/CodeGen/LoongArch/lsx/bitrev.ll | 9 ++---
llvm/test/CodeGen/LoongArch/lsx/bitset.ll | 9 ++---
10 files changed, 90 insertions(+), 41 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
index bfe2b38c56d39..924f360a3ea4d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
@@ -447,16 +447,18 @@ bool LoongArchDAGToDAGISel::selectVSplatImmNeg(SDValue N,
return false;
}
+template <unsigned EltBitSize>
bool LoongArchDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N,
SDValue &SplatImm) const {
APInt ImmValue;
EVT EltTy = N->getValueType(0).getVectorElementType();
+ unsigned EltBitWidth = EltBitSize ? EltBitSize : EltTy.getSizeInBits();
if (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0);
- if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
- ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+ if (selectVSplat(N.getNode(), ImmValue, EltBitWidth) &&
+ ImmValue.getBitWidth() == EltBitWidth) {
int32_t Log2 = (~ImmValue).exactLogBase2();
if (Log2 != -1) {
@@ -468,16 +470,18 @@ bool LoongArchDAGToDAGISel::selectVSplatUimmInvPow2(SDValue N,
return false;
}
+template <unsigned EltBitSize>
bool LoongArchDAGToDAGISel::selectVSplatUimmPow2(SDValue N,
SDValue &SplatImm) const {
APInt ImmValue;
EVT EltTy = N->getValueType(0).getVectorElementType();
+ unsigned EltBitWidth = EltBitSize ? EltBitSize : EltTy.getSizeInBits();
if (N->getOpcode() == ISD::BITCAST)
N = N->getOperand(0);
- if (selectVSplat(N.getNode(), ImmValue, EltTy.getSizeInBits()) &&
- ImmValue.getBitWidth() == EltTy.getSizeInBits()) {
+ if (selectVSplat(N.getNode(), ImmValue, EltBitWidth) &&
+ ImmValue.getBitWidth() == EltBitWidth) {
int32_t Log2 = ImmValue.exactLogBase2();
if (Log2 != -1) {
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
index 12418b81f2d7b..ae56e6771bb22 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
@@ -64,8 +64,9 @@ class LoongArchDAGToDAGISel : public SelectionDAGISel {
bool selectVSplatImm(SDValue N, SDValue &SplatVal);
template <unsigned ImmSize>
bool selectVSplatImmNeg(SDValue N, SDValue &SplatVal) const;
-
+ template <unsigned EltSize = 0>
bool selectVSplatUimmInvPow2(SDValue N, SDValue &SplatImm) const;
+ template <unsigned EltSize = 0>
bool selectVSplatUimmPow2(SDValue N, SDValue &SplatImm) const;
// Return the LoongArch branch opcode that matches the given DAG integer
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index b807b5927585b..336456cfd0b15 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1517,6 +1517,15 @@ def : Pat<(and (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_inv_pow2 uimm5:$imm))),
def : Pat<(and (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_inv_pow2 uimm6:$imm))),
(XVBITCLRI_D LASX256:$xj, uimm6:$imm)>;
+foreach vt = [v16i16, v8i32, v4i64] in {
+ def : Pat<(and (vt LASX256:$vj), (vt (vsplat_i8_inv_pow2 grlenimm:$imm))),
+ (XVBITCLRI_B LASX256:$vj, grlenimm:$imm)>;
+ def : Pat<(and (vt LASX256:$vj), (vt (vsplat_i16_inv_pow2 grlenimm:$imm))),
+ (XVBITCLRI_H LASX256:$vj, grlenimm:$imm)>;
+ def : Pat<(and (vt LASX256:$vj), (vt (vsplat_i32_inv_pow2 grlenimm:$imm))),
+ (XVBITCLRI_W LASX256:$vj, grlenimm:$imm)>;
+}
+
// XVBITSET_{B/H/W/D}
def : Pat<(or v32i8:$xj, (shl vsplat_imm_eq_1, v32i8:$xk)),
(v32i8 (XVBITSET_B v32i8:$xj, v32i8:$xk))>;
@@ -1545,6 +1554,15 @@ def : Pat<(or (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_pow2 uimm5:$imm))),
def : Pat<(or (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_pow2 uimm6:$imm))),
(XVBITSETI_D LASX256:$xj, uimm6:$imm)>;
+foreach vt = [v16i16, v8i32, v4i64] in {
+ def : Pat<(or (vt LASX256:$vj), (vt (vsplat_i8_pow2 grlenimm:$imm))),
+ (XVBITSETI_B LASX256:$vj, grlenimm:$imm)>;
+ def : Pat<(or (vt LASX256:$vj), (vt (vsplat_i16_pow2 grlenimm:$imm))),
+ (XVBITSETI_H LASX256:$vj, grlenimm:$imm)>;
+ def : Pat<(or (vt LASX256:$vj), (vt (vsplat_i32_pow2 grlenimm:$imm))),
+ (XVBITSETI_W LASX256:$vj, grlenimm:$imm)>;
+}
+
// XVBITREV_{B/H/W/D}
def : Pat<(xor v32i8:$xj, (shl vsplat_imm_eq_1, v32i8:$xk)),
(v32i8 (XVBITREV_B v32i8:$xj, v32i8:$xk))>;
@@ -1573,6 +1591,15 @@ def : Pat<(xor (v8i32 LASX256:$xj), (v8i32 (vsplat_uimm_pow2 uimm5:$imm))),
def : Pat<(xor (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_pow2 uimm6:$imm))),
(XVBITREVI_D LASX256:$xj, uimm6:$imm)>;
+foreach vt = [v16i16, v8i32, v4i64] in {
+ def : Pat<(xor (vt LASX256:$vj), (vt (vsplat_i8_pow2 grlenimm:$imm))),
+ (XVBITREVI_B LASX256:$vj, grlenimm:$imm)>;
+ def : Pat<(xor (vt LASX256:$vj), (vt (vsplat_i16_pow2 grlenimm:$imm))),
+ (XVBITREVI_H LASX256:$vj, grlenimm:$imm)>;
+ def : Pat<(xor (vt LASX256:$vj), (vt (vsplat_i32_pow2 grlenimm:$imm))),
+ (XVBITREVI_W LASX256:$vj, grlenimm:$imm)>;
+}
+
// Vector bswaps
def : Pat<(bswap (v16i16 LASX256:$xj)), (XVSHUF4I_B LASX256:$xj, 0b10110001)>;
def : Pat<(bswap (v8i32 LASX256:$xj)), (XVSHUF4I_B LASX256:$xj, 0b00011011)>;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 92d3214363f7b..261774aeeb3e0 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -217,9 +217,17 @@ def vsplat_i8_uimm8 : ComplexPattern<vAny, 1, "selectVSplatImm<8, 8>",
def vsplat_uimm_inv_pow2 : ComplexPattern<vAny, 1, "selectVSplatUimmInvPow2",
[build_vector, bitconvert]>;
+foreach N = [8, 16, 32] in
+def vsplat_i#N#_inv_pow2 : ComplexPattern<vAny, 1, "selectVSplatUimmInvPow2<"#N#">",
+ [build_vector, bitconvert]>;
+
def vsplat_uimm_pow2 : ComplexPattern<vAny, 1, "selectVSplatUimmPow2",
[build_vector, bitconvert]>;
+foreach N = [8, 16, 32] in
+def vsplat_i#N#_pow2 : ComplexPattern<vAny, 1, "selectVSplatUimmPow2<"#N#">",
+ [build_vector, bitconvert]>;
+
def muladd : PatFrag<(ops node:$vd, node:$vj, node:$vk),
(add node:$vd, (mul node:$vj, node:$vk))>;
@@ -1738,6 +1746,15 @@ def : Pat<(and (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_inv_pow2 uimm5:$imm))),
def : Pat<(and (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_inv_pow2 uimm6:$imm))),
(VBITCLRI_D LSX128:$vj, uimm6:$imm)>;
+foreach vt = [v8i16, v4i32, v2i64] in {
+ def : Pat<(and (vt LSX128:$vj), (vt (vsplat_i8_inv_pow2 grlenimm:$imm))),
+ (VBITCLRI_B LSX128:$vj, grlenimm:$imm)>;
+ def : Pat<(and (vt LSX128:$vj), (vt (vsplat_i16_inv_pow2 grlenimm:$imm))),
+ (VBITCLRI_H LSX128:$vj, grlenimm:$imm)>;
+ def : Pat<(and (vt LSX128:$vj), (vt (vsplat_i32_inv_pow2 grlenimm:$imm))),
+ (VBITCLRI_W LSX128:$vj, grlenimm:$imm)>;
+}
+
// VBITSET_{B/H/W/D}
def : Pat<(or v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)),
(v16i8 (VBITSET_B v16i8:$vj, v16i8:$vk))>;
@@ -1766,6 +1783,15 @@ def : Pat<(or (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_pow2 uimm5:$imm))),
def : Pat<(or (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))),
(VBITSETI_D LSX128:$vj, uimm6:$imm)>;
+foreach vt = [v8i16, v4i32, v2i64] in {
+ def : Pat<(or (vt LSX128:$vj), (vt (vsplat_i8_pow2 grlenimm:$imm))),
+ (VBITSETI_B LSX128:$vj, grlenimm:$imm)>;
+ def : Pat<(or (vt LSX128:$vj), (vt (vsplat_i16_pow2 grlenimm:$imm))),
+ (VBITSETI_H LSX128:$vj, grlenimm:$imm)>;
+ def : Pat<(or (vt LSX128:$vj), (vt (vsplat_i32_pow2 grlenimm:$imm))),
+ (VBITSETI_W LSX128:$vj, grlenimm:$imm)>;
+}
+
// VBITREV_{B/H/W/D}
def : Pat<(xor v16i8:$vj, (shl vsplat_imm_eq_1, v16i8:$vk)),
(v16i8 (VBITREV_B v16i8:$vj, v16i8:$vk))>;
@@ -1794,6 +1820,15 @@ def : Pat<(xor (v4i32 LSX128:$vj), (v4i32 (vsplat_uimm_pow2 uimm5:$imm))),
def : Pat<(xor (v2i64 LSX128:$vj), (v2i64 (vsplat_uimm_pow2 uimm6:$imm))),
(VBITREVI_D LSX128:$vj, uimm6:$imm)>;
+foreach vt = [v8i16, v4i32, v2i64] in {
+ def : Pat<(xor (vt LSX128:$vj), (vt (vsplat_i8_pow2 grlenimm:$imm))),
+ (VBITREVI_B LSX128:$vj, grlenimm:$imm)>;
+ def : Pat<(xor (vt LSX128:$vj), (vt (vsplat_i16_pow2 grlenimm:$imm))),
+ (VBITREVI_H LSX128:$vj, grlenimm:$imm)>;
+ def : Pat<(xor (vt LSX128:$vj), (vt (vsplat_i32_pow2 grlenimm:$imm))),
+ (VBITREVI_W LSX128:$vj, grlenimm:$imm)>;
+}
+
// Vector bswaps
def : Pat<(bswap (v8i16 LSX128:$vj)), (VSHUF4I_B LSX128:$vj, 0b10110001)>;
def : Pat<(bswap (v4i32 LSX128:$vj)), (VSHUF4I_B LSX128:$vj, 0b00011011)>;
diff --git a/llvm/test/CodeGen/LoongArch/lasx/bitclr.ll b/llvm/test/CodeGen/LoongArch/lasx/bitclr.ll
index f4b6990a8d932..a9f110f474bf4 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/bitclr.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/bitclr.ll
@@ -55,8 +55,7 @@ entry:
define <8 x i32> @bitclri_v16i16_v8i32(<8 x i32> %a) nounwind {
; CHECK-LABEL: bitclri_v16i16_v8i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.h $xr1, -5
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvbitclri.h $xr0, $xr0, 2
; CHECK-NEXT: ret
entry:
%0 = and <8 x i32> %a, splat (i32 -262149)
@@ -76,8 +75,7 @@ entry:
define <4 x i64> @bitclri_v16i16_v4i64(<4 x i64> %a) nounwind {
; CHECK-LABEL: bitclri_v16i16_v4i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.h $xr1, -5
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvbitclri.h $xr0, $xr0, 2
; CHECK-NEXT: ret
entry:
%0 = and <4 x i64> %a, splat (i64 -1125917086973957)
@@ -87,8 +85,7 @@ entry:
define <4 x i64> @bitclri_v8i32_v4i64(<4 x i64> %a) nounwind {
; CHECK-LABEL: bitclri_v8i32_v4i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.w $xr1, -5
-; CHECK-NEXT: xvand.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvbitclri.w $xr0, $xr0, 2
; CHECK-NEXT: ret
entry:
%0 = and <4 x i64> %a, splat (i64 -17179869189)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/bitrev.ll b/llvm/test/CodeGen/LoongArch/lasx/bitrev.ll
index 3f21722b454f0..ed3320d983dcc 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/bitrev.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/bitrev.ll
@@ -55,8 +55,7 @@ entry:
define <8 x i32> @bitrevi_v16i16_v8i32(<8 x i32> %a) nounwind {
; CHECK-LABEL: bitrevi_v16i16_v8i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.h $xr1, 4
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvbitrevi.h $xr0, $xr0, 2
; CHECK-NEXT: ret
entry:
%0 = xor <8 x i32> %a, splat (i32 262148)
@@ -76,8 +75,7 @@ entry:
define <4 x i64> @bitrevi_v16i16_v4i64(<4 x i64> %a) nounwind {
; CHECK-LABEL: bitrevi_v16i16_v4i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.h $xr1, 4
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvbitrevi.h $xr0, $xr0, 2
; CHECK-NEXT: ret
entry:
%0 = xor <4 x i64> %a, splat (i64 1125917086973956)
@@ -87,8 +85,7 @@ entry:
define <4 x i64> @bitrevi_v8i32_v4i64(<4 x i64> %a) nounwind {
; CHECK-LABEL: bitrevi_v8i32_v4i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.w $xr1, 4
-; CHECK-NEXT: xvxor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvbitrevi.w $xr0, $xr0, 2
; CHECK-NEXT: ret
entry:
%0 = xor <4 x i64> %a, splat (i64 17179869188)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/bitset.ll b/llvm/test/CodeGen/LoongArch/lasx/bitset.ll
index 517f0629a5242..03ac5560d2aee 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/bitset.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/bitset.ll
@@ -55,8 +55,7 @@ entry:
define <8 x i32> @bitseti_v16i16_v8i32(<8 x i32> %a) nounwind {
; CHECK-LABEL: bitseti_v16i16_v8i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.h $xr1, 4
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvbitseti.h $xr0, $xr0, 2
; CHECK-NEXT: ret
entry:
%0 = or <8 x i32> %a, splat (i32 262148)
@@ -76,8 +75,7 @@ entry:
define <4 x i64> @bitseti_v16i16_v4i64(<4 x i64> %a) nounwind {
; CHECK-LABEL: bitseti_v16i16_v4i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.h $xr1, 4
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvbitseti.h $xr0, $xr0, 2
; CHECK-NEXT: ret
entry:
%0 = or <4 x i64> %a, splat (i64 1125917086973956)
@@ -87,8 +85,7 @@ entry:
define <4 x i64> @bitseti_v8i32_v4i64(<4 x i64> %a) nounwind {
; CHECK-LABEL: bitseti_v8i32_v4i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: xvrepli.w $xr1, 4
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvbitseti.w $xr0, $xr0, 2
; CHECK-NEXT: ret
entry:
%0 = or <4 x i64> %a, splat (i64 17179869188)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/bitclr.ll b/llvm/test/CodeGen/LoongArch/lsx/bitclr.ll
index cc41e2fa213fa..fa6ce6db4f8a9 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/bitclr.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/bitclr.ll
@@ -55,8 +55,7 @@ entry:
define <4 x i32> @bitclri_v8i16_v4i32(<4 x i32> %a) nounwind {
; CHECK-LABEL: bitclri_v8i16_v4i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.h $vr1, -5
-; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vbitclri.h $vr0, $vr0, 2
; CHECK-NEXT: ret
entry:
%0 = and <4 x i32> %a, splat (i32 -262149)
@@ -76,8 +75,7 @@ entry:
define <2 x i64> @bitclri_v8i16_v2i64(<2 x i64> %a) nounwind {
; CHECK-LABEL: bitclri_v8i16_v2i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.h $vr1, -5
-; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vbitclri.h $vr0, $vr0, 2
; CHECK-NEXT: ret
entry:
%0 = and <2 x i64> %a, splat (i64 -1125917086973957)
@@ -87,8 +85,7 @@ entry:
define <2 x i64> @bitclri_v4i32_v2i64(<2 x i64> %a) nounwind {
; CHECK-LABEL: bitclri_v4i32_v2i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.w $vr1, -5
-; CHECK-NEXT: vand.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vbitclri.w $vr0, $vr0, 2
; CHECK-NEXT: ret
entry:
%0 = and <2 x i64> %a, splat (i64 -17179869189)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/bitrev.ll b/llvm/test/CodeGen/LoongArch/lsx/bitrev.ll
index 9d5685f4679ee..be68b501192c9 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/bitrev.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/bitrev.ll
@@ -55,8 +55,7 @@ entry:
define <4 x i32> @bitrevi_v8i16_v4i32(<4 x i32> %a) nounwind {
; CHECK-LABEL: bitrevi_v8i16_v4i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.h $vr1, 4
-; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vbitrevi.h $vr0, $vr0, 2
; CHECK-NEXT: ret
entry:
%0 = xor <4 x i32> %a, splat (i32 262148)
@@ -76,8 +75,7 @@ entry:
define <2 x i64> @bitrevi_v8i16_v2i64(<2 x i64> %a) nounwind {
; CHECK-LABEL: bitrevi_v8i16_v2i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.h $vr1, 4
-; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vbitrevi.h $vr0, $vr0, 2
; CHECK-NEXT: ret
entry:
%0 = xor <2 x i64> %a, splat (i64 1125917086973956)
@@ -87,8 +85,7 @@ entry:
define <2 x i64> @bitrevi_v4i32_v2i64(<2 x i64> %a) nounwind {
; CHECK-LABEL: bitrevi_v4i32_v2i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.w $vr1, 4
-; CHECK-NEXT: vxor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vbitrevi.w $vr0, $vr0, 2
; CHECK-NEXT: ret
entry:
%0 = xor <2 x i64> %a, splat (i64 17179869188)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/bitset.ll b/llvm/test/CodeGen/LoongArch/lsx/bitset.ll
index 79ac3a86c9613..7896f9235987d 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/bitset.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/bitset.ll
@@ -55,8 +55,7 @@ entry:
define <4 x i32> @bitseti_v8i16_v4i32(<4 x i32> %a) nounwind {
; CHECK-LABEL: bitseti_v8i16_v4i32:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.h $vr1, 4
-; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vbitseti.h $vr0, $vr0, 2
; CHECK-NEXT: ret
entry:
%0 = or <4 x i32> %a, splat (i32 262148)
@@ -76,8 +75,7 @@ entry:
define <2 x i64> @bitseti_v8i16_v2i64(<2 x i64> %a) nounwind {
; CHECK-LABEL: bitseti_v8i16_v2i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.h $vr1, 4
-; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vbitseti.h $vr0, $vr0, 2
; CHECK-NEXT: ret
entry:
%0 = or <2 x i64> %a, splat (i64 1125917086973956)
@@ -87,8 +85,7 @@ entry:
define <2 x i64> @bitseti_v4i32_v2i64(<2 x i64> %a) nounwind {
; CHECK-LABEL: bitseti_v4i32_v2i64:
; CHECK: # %bb.0: # %entry
-; CHECK-NEXT: vrepli.w $vr1, 4
-; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vbitseti.w $vr0, $vr0, 2
; CHECK-NEXT: ret
entry:
%0 = or <2 x i64> %a, splat (i64 17179869188)
>From 72192b8dc8bcf851a7955bfbc368d42c2446135e Mon Sep 17 00:00:00 2001
From: WANG Rui <r at hev.cc>
Date: Mon, 27 Apr 2026 17:04:37 +0800
Subject: [PATCH 2/2] Address wanglei's comments
---
.../LoongArch/LoongArchLASXInstrInfo.td | 36 +++++++++----------
1 file changed, 18 insertions(+), 18 deletions(-)
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index 336456cfd0b15..aadd567ffa6cf 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1518,12 +1518,12 @@ def : Pat<(and (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_inv_pow2 uimm6:$imm))),
(XVBITCLRI_D LASX256:$xj, uimm6:$imm)>;
foreach vt = [v16i16, v8i32, v4i64] in {
- def : Pat<(and (vt LASX256:$vj), (vt (vsplat_i8_inv_pow2 grlenimm:$imm))),
- (XVBITCLRI_B LASX256:$vj, grlenimm:$imm)>;
- def : Pat<(and (vt LASX256:$vj), (vt (vsplat_i16_inv_pow2 grlenimm:$imm))),
- (XVBITCLRI_H LASX256:$vj, grlenimm:$imm)>;
- def : Pat<(and (vt LASX256:$vj), (vt (vsplat_i32_inv_pow2 grlenimm:$imm))),
- (XVBITCLRI_W LASX256:$vj, grlenimm:$imm)>;
+ def : Pat<(and (vt LASX256:$xj), (vt (vsplat_i8_inv_pow2 grlenimm:$imm))),
+ (XVBITCLRI_B LASX256:$xj, grlenimm:$imm)>;
+ def : Pat<(and (vt LASX256:$xj), (vt (vsplat_i16_inv_pow2 grlenimm:$imm))),
+ (XVBITCLRI_H LASX256:$xj, grlenimm:$imm)>;
+ def : Pat<(and (vt LASX256:$xj), (vt (vsplat_i32_inv_pow2 grlenimm:$imm))),
+ (XVBITCLRI_W LASX256:$xj, grlenimm:$imm)>;
}
// XVBITSET_{B/H/W/D}
@@ -1555,12 +1555,12 @@ def : Pat<(or (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_pow2 uimm6:$imm))),
(XVBITSETI_D LASX256:$xj, uimm6:$imm)>;
foreach vt = [v16i16, v8i32, v4i64] in {
- def : Pat<(or (vt LASX256:$vj), (vt (vsplat_i8_pow2 grlenimm:$imm))),
- (XVBITSETI_B LASX256:$vj, grlenimm:$imm)>;
- def : Pat<(or (vt LASX256:$vj), (vt (vsplat_i16_pow2 grlenimm:$imm))),
- (XVBITSETI_H LASX256:$vj, grlenimm:$imm)>;
- def : Pat<(or (vt LASX256:$vj), (vt (vsplat_i32_pow2 grlenimm:$imm))),
- (XVBITSETI_W LASX256:$vj, grlenimm:$imm)>;
+ def : Pat<(or (vt LASX256:$xj), (vt (vsplat_i8_pow2 grlenimm:$imm))),
+ (XVBITSETI_B LASX256:$xj, grlenimm:$imm)>;
+ def : Pat<(or (vt LASX256:$xj), (vt (vsplat_i16_pow2 grlenimm:$imm))),
+ (XVBITSETI_H LASX256:$xj, grlenimm:$imm)>;
+ def : Pat<(or (vt LASX256:$xj), (vt (vsplat_i32_pow2 grlenimm:$imm))),
+ (XVBITSETI_W LASX256:$xj, grlenimm:$imm)>;
}
// XVBITREV_{B/H/W/D}
@@ -1592,12 +1592,12 @@ def : Pat<(xor (v4i64 LASX256:$xj), (v4i64 (vsplat_uimm_pow2 uimm6:$imm))),
(XVBITREVI_D LASX256:$xj, uimm6:$imm)>;
foreach vt = [v16i16, v8i32, v4i64] in {
- def : Pat<(xor (vt LASX256:$vj), (vt (vsplat_i8_pow2 grlenimm:$imm))),
- (XVBITREVI_B LASX256:$vj, grlenimm:$imm)>;
- def : Pat<(xor (vt LASX256:$vj), (vt (vsplat_i16_pow2 grlenimm:$imm))),
- (XVBITREVI_H LASX256:$vj, grlenimm:$imm)>;
- def : Pat<(xor (vt LASX256:$vj), (vt (vsplat_i32_pow2 grlenimm:$imm))),
- (XVBITREVI_W LASX256:$vj, grlenimm:$imm)>;
+ def : Pat<(xor (vt LASX256:$xj), (vt (vsplat_i8_pow2 grlenimm:$imm))),
+ (XVBITREVI_B LASX256:$xj, grlenimm:$imm)>;
+ def : Pat<(xor (vt LASX256:$xj), (vt (vsplat_i16_pow2 grlenimm:$imm))),
+ (XVBITREVI_H LASX256:$xj, grlenimm:$imm)>;
+ def : Pat<(xor (vt LASX256:$xj), (vt (vsplat_i32_pow2 grlenimm:$imm))),
+ (XVBITREVI_W LASX256:$xj, grlenimm:$imm)>;
}
// Vector bswaps
More information about the llvm-branch-commits
mailing list