[llvm-branch-commits] [llvm] [LoongArch] Make rotl/rotr custom for lsx/lasx (PR #161154)
Zhaoxin Yang via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Mon Sep 29 02:13:43 PDT 2025
https://github.com/ylzsx created https://github.com/llvm/llvm-project/pull/161154
None
>From f2affc005ca009009a9cfbb190f7dc0d9c661520 Mon Sep 17 00:00:00 2001
From: yangzhaoxin <yangzhaoxin at loongson.cn>
Date: Mon, 29 Sep 2025 16:26:03 +0800
Subject: [PATCH 1/2] [LoongArch] Make rotl/rotr custom for lsx/lasx
---
.../LoongArch/LoongArchISelLowering.cpp | 60 +++++++++++++++++++
.../Target/LoongArch/LoongArchISelLowering.h | 1 +
.../LoongArch/LoongArchLASXInstrInfo.td | 5 ++
.../Target/LoongArch/LoongArchLSXInstrInfo.td | 5 ++
4 files changed, 71 insertions(+)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 94f53d5b85f10..073b2ddcd049e 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -346,6 +346,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SSUBSAT, VT, Legal);
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
+ setOperationAction(ISD::ROTL, VT, Custom);
+ setOperationAction(ISD::ROTR, VT, Custom);
}
for (MVT VT : {MVT::v16i8, MVT::v8i16, MVT::v4i32})
setOperationAction(ISD::BITREVERSE, VT, Custom);
@@ -426,6 +428,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::UADDSAT, VT, Legal);
setOperationAction(ISD::USUBSAT, VT, Legal);
setOperationAction(ISD::VECREDUCE_ADD, VT, Custom);
+ setOperationAction(ISD::ROTL, VT, Custom);
+ setOperationAction(ISD::ROTR, VT, Custom);
}
for (MVT VT : {MVT::v32i8, MVT::v16i16, MVT::v8i32})
setOperationAction(ISD::BITREVERSE, VT, Custom);
@@ -580,6 +584,9 @@ SDValue LoongArchTargetLowering::LowerOperation(SDValue Op,
return lowerBF16_TO_FP(Op, DAG);
case ISD::VECREDUCE_ADD:
return lowerVECREDUCE_ADD(Op, DAG);
+ case ISD::ROTL:
+ case ISD::ROTR:
+ return lowerRotate(Op, DAG);
case ISD::VECREDUCE_AND:
case ISD::VECREDUCE_OR:
case ISD::VECREDUCE_XOR:
@@ -753,6 +760,59 @@ SDValue LoongArchTargetLowering::lowerPREFETCH(SDValue Op,
return Op;
}
+SDValue LoongArchTargetLowering::lowerRotate(SDValue Op,
+ SelectionDAG &DAG) const {
+ MVT VT = Op.getSimpleValueType();
+ if (!VT.isVector())
+ return Op;
+
+ SDLoc DL(Op);
+ SDValue R = Op.getOperand(0);
+ SDValue Amt = Op.getOperand(1);
+ unsigned Opcode = Op.getOpcode();
+ unsigned EltSizeInBits = VT.getScalarSizeInBits();
+
+ auto checkCstSplat = [](SDValue V, APInt &CstSplatValue) {
+ if (V.getOpcode() != ISD::BUILD_VECTOR)
+ return false;
+ if (SDValue SplatValue =
+ cast<BuildVectorSDNode>(V.getNode())->getSplatValue()) {
+ if (auto *C = dyn_cast<ConstantSDNode>(SplatValue)) {
+ CstSplatValue = C->getAPIntValue();
+ return true;
+ }
+ }
+ return false;
+ };
+
+ // check for constant splat rotation amount.
+ APInt CstSplatValue;
+ bool IsCstSplat = checkCstSplat(Amt, CstSplatValue);
+ bool isROTL = Opcode == ISD::ROTL;
+
+ // Check for splat rotate by zero.
+ if (IsCstSplat && CstSplatValue.urem(EltSizeInBits) == 0)
+ return R;
+
+ // LoongArch tagets always prefers ISD::ROTR.
+ if (isROTL) {
+ SDValue Zero = DAG.getConstant(0, DL, VT);
+ return DAG.getNode(ISD::ROTR, DL, VT, R,
+ DAG.getNode(ISD::SUB, DL, VT, Zero, Amt));
+ }
+
+ // Rotate by a immediate.
+ if (IsCstSplat) {
+ // ISD::ROTR: Attemp to rotate by a positive immediate.
+ SDValue Bits = DAG.getConstant(EltSizeInBits, DL, VT);
+ if (SDValue Urem =
+ DAG.FoldConstantArithmetic(ISD::UREM, DL, VT, {Amt, Bits}))
+ return DAG.getNode(Op.getOpcode(), DL, VT, R, Urem);
+ }
+
+ return Op;
+}
+
// Return true if Val is equal to (setcc LHS, RHS, CC).
// Return false if Val is the inverse of (setcc LHS, RHS, CC).
// Otherwise, return std::nullopt.
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 3c00296116ac2..d782498019914 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -411,6 +411,7 @@ class LoongArchTargetLowering : public TargetLowering {
SDValue lowerVECREDUCE_ADD(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerVECREDUCE(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerConstantFP(SDValue Op, SelectionDAG &DAG) const;
+ SDValue lowerRotate(SDValue Op, SelectionDAG &DAG) const;
bool isFPImmLegal(const APFloat &Imm, EVT VT,
bool ForCodeSize) const override;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
index adfe990ba1234..bd6ab2a789b26 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLASXInstrInfo.td
@@ -1442,6 +1442,11 @@ defm : PatXrXr<sra, "XVSRA">;
defm : PatShiftXrXr<sra, "XVSRA">;
defm : PatShiftXrSplatUimm<sra, "XVSRAI">;
+// XVROTR[I]_{B/H/W/D}
+defm : PatXrXr<rotr, "XVROTR">;
+defm : PatShiftXrXr<rotr, "XVROTR">;
+defm : PatShiftXrSplatUimm<rotr, "XVROTRI">;
+
// XVCLZ_{B/H/W/D}
defm : PatXr<ctlz, "XVCLZ">;
diff --git a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
index 2c36099f8eb71..a159954140fb9 100644
--- a/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchLSXInstrInfo.td
@@ -1633,6 +1633,11 @@ defm : PatVrVr<sra, "VSRA">;
defm : PatShiftVrVr<sra, "VSRA">;
defm : PatShiftVrSplatUimm<sra, "VSRAI">;
+// VROTR[I]_{B/H/W/D}
+defm : PatVrVr<rotr, "VROTR">;
+defm : PatShiftVrVr<rotr, "VROTR">;
+defm : PatShiftVrSplatUimm<rotr, "VROTRI">;
+
// VCLZ_{B/H/W/D}
defm : PatVr<ctlz, "VCLZ">;
>From e8a821835c84788639a7d8ba6b2c7061fa187eb8 Mon Sep 17 00:00:00 2001
From: yangzhaoxin <yangzhaoxin at loongson.cn>
Date: Mon, 29 Sep 2025 17:10:54 +0800
Subject: [PATCH 2/2] add tests
---
llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll | 107 ++++++------------
llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll | 105 ++++++-----------
2 files changed, 70 insertions(+), 142 deletions(-)
diff --git a/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll
index f9f024dda973c..6b8ab2cdb94e1 100644
--- a/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll
+++ b/llvm/test/CodeGen/LoongArch/lasx/rotl-rotr.ll
@@ -7,11 +7,8 @@ define void @rotl_v32i8(ptr %dst, ptr %src, i8 signext %a0) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2
-; CHECK-NEXT: xvrepli.b $xr2, 8
-; CHECK-NEXT: xvsub.b $xr2, $xr2, $xr1
-; CHECK-NEXT: xvsll.b $xr1, $xr0, $xr1
-; CHECK-NEXT: xvsrl.b $xr0, $xr0, $xr2
-; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT: xvneg.b $xr1, $xr1
+; CHECK-NEXT: xvrotr.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <32 x i8>, ptr %src
@@ -30,11 +27,7 @@ define void @rotr_v32i8(ptr %dst, ptr %src, i8 signext %a0) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvreplgr2vr.b $xr1, $a2
-; CHECK-NEXT: xvrepli.b $xr2, 8
-; CHECK-NEXT: xvsub.b $xr2, $xr2, $xr1
-; CHECK-NEXT: xvsrl.b $xr1, $xr0, $xr1
-; CHECK-NEXT: xvsll.b $xr0, $xr0, $xr2
-; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT: xvrotr.b $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <32 x i8>, ptr %src
@@ -52,9 +45,7 @@ define void @rotr_v32i8_imm(ptr %dst, ptr %src) nounwind {
; CHECK-LABEL: rotr_v32i8_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvsrli.b $xr1, $xr0, 2
-; CHECK-NEXT: xvslli.b $xr0, $xr0, 6
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvrotri.b $xr0, $xr0, 2
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <32 x i8>, ptr %src
@@ -70,11 +61,8 @@ define void @rotl_v16i16(ptr %dst, ptr %src, i16 signext %a0) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2
-; CHECK-NEXT: xvrepli.h $xr2, 16
-; CHECK-NEXT: xvsub.h $xr2, $xr2, $xr1
-; CHECK-NEXT: xvsll.h $xr1, $xr0, $xr1
-; CHECK-NEXT: xvsrl.h $xr0, $xr0, $xr2
-; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT: xvneg.h $xr1, $xr1
+; CHECK-NEXT: xvrotr.h $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <16 x i16>, ptr %src
@@ -93,11 +81,7 @@ define void @rotr_v16i16(ptr %dst, ptr %src, i16 signext %a0) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvreplgr2vr.h $xr1, $a2
-; CHECK-NEXT: xvrepli.h $xr2, 16
-; CHECK-NEXT: xvsub.h $xr2, $xr2, $xr1
-; CHECK-NEXT: xvsrl.h $xr1, $xr0, $xr1
-; CHECK-NEXT: xvsll.h $xr0, $xr0, $xr2
-; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT: xvrotr.h $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <16 x i16>, ptr %src
@@ -115,9 +99,7 @@ define void @rotr_v16i16_imm(ptr %dst, ptr %src) nounwind {
; CHECK-LABEL: rotr_v16i16_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvsrli.h $xr1, $xr0, 2
-; CHECK-NEXT: xvslli.h $xr0, $xr0, 14
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvrotri.h $xr0, $xr0, 2
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <16 x i16>, ptr %src
@@ -133,11 +115,8 @@ define void @rotl_v8i32(ptr %dst, ptr %src, i32 signext %a0) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
-; CHECK-NEXT: xvrepli.w $xr2, 32
-; CHECK-NEXT: xvsub.w $xr2, $xr2, $xr1
-; CHECK-NEXT: xvsll.w $xr1, $xr0, $xr1
-; CHECK-NEXT: xvsrl.w $xr0, $xr0, $xr2
-; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT: xvneg.w $xr1, $xr1
+; CHECK-NEXT: xvrotr.w $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <8 x i32>, ptr %src
@@ -156,11 +135,7 @@ define void @rotr_v8i32(ptr %dst, ptr %src, i32 signext %a0) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a1, 0
; CHECK-NEXT: xvreplgr2vr.w $xr1, $a2
-; CHECK-NEXT: xvrepli.w $xr2, 32
-; CHECK-NEXT: xvsub.w $xr2, $xr2, $xr1
-; CHECK-NEXT: xvsrl.w $xr1, $xr0, $xr1
-; CHECK-NEXT: xvsll.w $xr0, $xr0, $xr2
-; CHECK-NEXT: xvor.v $xr0, $xr1, $xr0
+; CHECK-NEXT: xvrotr.w $xr0, $xr0, $xr1
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <8 x i32>, ptr %src
@@ -178,9 +153,7 @@ define void @rotr_v8i32_imm(ptr %dst, ptr %src) nounwind {
; CHECK-LABEL: rotr_v8i32_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvsrli.w $xr1, $xr0, 2
-; CHECK-NEXT: xvslli.w $xr0, $xr0, 30
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
+; CHECK-NEXT: xvrotri.w $xr0, $xr0, 2
; CHECK-NEXT: xvst $xr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <8 x i32>, ptr %src
@@ -196,13 +169,10 @@ define void @rotl_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind {
; LA32: # %bb.0:
; LA32-NEXT: xvld $xr0, $a1, 0
; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA32-NEXT: xvreplve0.d $xr1, $xr1
-; LA32-NEXT: xvrepli.d $xr2, 64
-; LA32-NEXT: xvsub.d $xr2, $xr2, $xr1
-; LA32-NEXT: xvsll.d $xr1, $xr0, $xr1
-; LA32-NEXT: xvsrl.d $xr0, $xr0, $xr2
-; LA32-NEXT: xvor.v $xr0, $xr1, $xr0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
+; LA32-NEXT: xvpermi.q $xr1, $xr1, 2
+; LA32-NEXT: xvneg.d $xr1, $xr1
+; LA32-NEXT: xvrotr.d $xr0, $xr0, $xr1
; LA32-NEXT: xvst $xr0, $a0, 0
; LA32-NEXT: ret
;
@@ -210,11 +180,8 @@ define void @rotl_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind {
; LA64: # %bb.0:
; LA64-NEXT: xvld $xr0, $a1, 0
; LA64-NEXT: xvreplgr2vr.d $xr1, $a2
-; LA64-NEXT: xvrepli.d $xr2, 64
-; LA64-NEXT: xvsub.d $xr2, $xr2, $xr1
-; LA64-NEXT: xvsll.d $xr1, $xr0, $xr1
-; LA64-NEXT: xvsrl.d $xr0, $xr0, $xr2
-; LA64-NEXT: xvor.v $xr0, $xr1, $xr0
+; LA64-NEXT: xvneg.d $xr1, $xr1
+; LA64-NEXT: xvrotr.d $xr0, $xr0, $xr1
; LA64-NEXT: xvst $xr0, $a0, 0
; LA64-NEXT: ret
%v0 = load <4 x i64>, ptr %src
@@ -233,13 +200,9 @@ define void @rotr_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind {
; LA32: # %bb.0:
; LA32-NEXT: xvld $xr0, $a1, 0
; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA32-NEXT: xvreplve0.d $xr1, $xr1
-; LA32-NEXT: xvrepli.d $xr2, 64
-; LA32-NEXT: xvsub.d $xr2, $xr2, $xr1
-; LA32-NEXT: xvsrl.d $xr1, $xr0, $xr1
-; LA32-NEXT: xvsll.d $xr0, $xr0, $xr2
-; LA32-NEXT: xvor.v $xr0, $xr1, $xr0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
+; LA32-NEXT: xvpermi.q $xr1, $xr1, 2
+; LA32-NEXT: xvrotr.d $xr0, $xr0, $xr1
; LA32-NEXT: xvst $xr0, $a0, 0
; LA32-NEXT: ret
;
@@ -247,11 +210,7 @@ define void @rotr_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind {
; LA64: # %bb.0:
; LA64-NEXT: xvld $xr0, $a1, 0
; LA64-NEXT: xvreplgr2vr.d $xr1, $a2
-; LA64-NEXT: xvrepli.d $xr2, 64
-; LA64-NEXT: xvsub.d $xr2, $xr2, $xr1
-; LA64-NEXT: xvsrl.d $xr1, $xr0, $xr1
-; LA64-NEXT: xvsll.d $xr0, $xr0, $xr2
-; LA64-NEXT: xvor.v $xr0, $xr1, $xr0
+; LA64-NEXT: xvrotr.d $xr0, $xr0, $xr1
; LA64-NEXT: xvst $xr0, $a0, 0
; LA64-NEXT: ret
%v0 = load <4 x i64>, ptr %src
@@ -266,14 +225,20 @@ define void @rotr_v4i64(ptr %dst, ptr %src, i64 %a0) nounwind {
}
define void @rotr_v4i64_imm(ptr %dst, ptr %src) nounwind {
-; CHECK-LABEL: rotr_v4i64_imm:
-; CHECK: # %bb.0:
-; CHECK-NEXT: xvld $xr0, $a1, 0
-; CHECK-NEXT: xvsrli.d $xr1, $xr0, 2
-; CHECK-NEXT: xvslli.d $xr0, $xr0, 62
-; CHECK-NEXT: xvor.v $xr0, $xr0, $xr1
-; CHECK-NEXT: xvst $xr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: rotr_v4i64_imm:
+; LA32: # %bb.0:
+; LA32-NEXT: xvld $xr0, $a1, 0
+; LA32-NEXT: xvrepli.w $xr1, -62
+; LA32-NEXT: xvrotr.d $xr0, $xr0, $xr1
+; LA32-NEXT: xvst $xr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: rotr_v4i64_imm:
+; LA64: # %bb.0:
+; LA64-NEXT: xvld $xr0, $a1, 0
+; LA64-NEXT: xvrotri.d $xr0, $xr0, 2
+; LA64-NEXT: xvst $xr0, $a0, 0
+; LA64-NEXT: ret
%v0 = load <4 x i64>, ptr %src
%b = lshr <4 x i64> %v0, splat (i64 2)
%c = shl <4 x i64> %v0, splat (i64 62)
diff --git a/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll
index 79e74f35abafb..106a7b0e3f0a5 100644
--- a/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll
+++ b/llvm/test/CodeGen/LoongArch/lsx/rotl-rotr.ll
@@ -7,11 +7,8 @@ define void @rotl_v16i8(ptr %dst, ptr %src, i8 signext %a0) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vreplgr2vr.b $vr1, $a2
-; CHECK-NEXT: vrepli.b $vr2, 8
-; CHECK-NEXT: vsub.b $vr2, $vr2, $vr1
-; CHECK-NEXT: vsll.b $vr1, $vr0, $vr1
-; CHECK-NEXT: vsrl.b $vr0, $vr0, $vr2
-; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vneg.b $vr1, $vr1
+; CHECK-NEXT: vrotr.b $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <16 x i8>, ptr %src
@@ -30,11 +27,7 @@ define void @rotr_v16i8(ptr %dst, ptr %src, i8 signext %a0) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vreplgr2vr.b $vr1, $a2
-; CHECK-NEXT: vrepli.b $vr2, 8
-; CHECK-NEXT: vsub.b $vr2, $vr2, $vr1
-; CHECK-NEXT: vsrl.b $vr1, $vr0, $vr1
-; CHECK-NEXT: vsll.b $vr0, $vr0, $vr2
-; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vrotr.b $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <16 x i8>, ptr %src
@@ -52,9 +45,7 @@ define void @rotr_v16i8_imm(ptr %dst, ptr %src) nounwind {
; CHECK-LABEL: rotr_v16i8_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vsrli.b $vr1, $vr0, 2
-; CHECK-NEXT: vslli.b $vr0, $vr0, 6
-; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vrotri.b $vr0, $vr0, 2
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <16 x i8>, ptr %src
@@ -70,11 +61,8 @@ define void @rotl_v8i16(ptr %dst, ptr %src, i16 signext %a0) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vreplgr2vr.h $vr1, $a2
-; CHECK-NEXT: vrepli.h $vr2, 16
-; CHECK-NEXT: vsub.h $vr2, $vr2, $vr1
-; CHECK-NEXT: vsll.h $vr1, $vr0, $vr1
-; CHECK-NEXT: vsrl.h $vr0, $vr0, $vr2
-; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vneg.h $vr1, $vr1
+; CHECK-NEXT: vrotr.h $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <8 x i16>, ptr %src
@@ -93,11 +81,7 @@ define void @rotr_v8i16(ptr %dst, ptr %src, i16 signext %a0) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vreplgr2vr.h $vr1, $a2
-; CHECK-NEXT: vrepli.h $vr2, 16
-; CHECK-NEXT: vsub.h $vr2, $vr2, $vr1
-; CHECK-NEXT: vsrl.h $vr1, $vr0, $vr1
-; CHECK-NEXT: vsll.h $vr0, $vr0, $vr2
-; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vrotr.h $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <8 x i16>, ptr %src
@@ -115,9 +99,7 @@ define void @rotr_v8i16_imm(ptr %dst, ptr %src) nounwind {
; CHECK-LABEL: rotr_v8i16_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vsrli.h $vr1, $vr0, 2
-; CHECK-NEXT: vslli.h $vr0, $vr0, 14
-; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vrotri.h $vr0, $vr0, 2
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <8 x i16>, ptr %src
@@ -133,11 +115,8 @@ define void @rotl_v4i32(ptr %dst, ptr %src, i32 signext %a0) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vreplgr2vr.w $vr1, $a2
-; CHECK-NEXT: vrepli.w $vr2, 32
-; CHECK-NEXT: vsub.w $vr2, $vr2, $vr1
-; CHECK-NEXT: vsll.w $vr1, $vr0, $vr1
-; CHECK-NEXT: vsrl.w $vr0, $vr0, $vr2
-; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vneg.w $vr1, $vr1
+; CHECK-NEXT: vrotr.w $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <4 x i32>, ptr %src
@@ -156,11 +135,7 @@ define void @rotr_v4i32(ptr %dst, ptr %src, i32 signext %a0) nounwind {
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
; CHECK-NEXT: vreplgr2vr.w $vr1, $a2
-; CHECK-NEXT: vrepli.w $vr2, 32
-; CHECK-NEXT: vsub.w $vr2, $vr2, $vr1
-; CHECK-NEXT: vsrl.w $vr1, $vr0, $vr1
-; CHECK-NEXT: vsll.w $vr0, $vr0, $vr2
-; CHECK-NEXT: vor.v $vr0, $vr1, $vr0
+; CHECK-NEXT: vrotr.w $vr0, $vr0, $vr1
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <4 x i32>, ptr %src
@@ -178,9 +153,7 @@ define void @rotr_v4i32_imm(ptr %dst, ptr %src) nounwind {
; CHECK-LABEL: rotr_v4i32_imm:
; CHECK: # %bb.0:
; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vsrli.w $vr1, $vr0, 2
-; CHECK-NEXT: vslli.w $vr0, $vr0, 30
-; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
+; CHECK-NEXT: vrotri.w $vr0, $vr0, 2
; CHECK-NEXT: vst $vr0, $a0, 0
; CHECK-NEXT: ret
%v0 = load <4 x i32>, ptr %src
@@ -196,13 +169,9 @@ define void @rotl_v2i64(ptr %dst, ptr %src, i64 %a0) nounwind {
; LA32: # %bb.0:
; LA32-NEXT: vld $vr0, $a1, 0
; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA32-NEXT: vreplvei.d $vr1, $vr1, 0
-; LA32-NEXT: vrepli.d $vr2, 64
-; LA32-NEXT: vsub.d $vr2, $vr2, $vr1
-; LA32-NEXT: vsll.d $vr1, $vr0, $vr1
-; LA32-NEXT: vsrl.d $vr0, $vr0, $vr2
-; LA32-NEXT: vor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
+; LA32-NEXT: vneg.d $vr1, $vr1
+; LA32-NEXT: vrotr.d $vr0, $vr0, $vr1
; LA32-NEXT: vst $vr0, $a0, 0
; LA32-NEXT: ret
;
@@ -210,11 +179,8 @@ define void @rotl_v2i64(ptr %dst, ptr %src, i64 %a0) nounwind {
; LA64: # %bb.0:
; LA64-NEXT: vld $vr0, $a1, 0
; LA64-NEXT: vreplgr2vr.d $vr1, $a2
-; LA64-NEXT: vrepli.d $vr2, 64
-; LA64-NEXT: vsub.d $vr2, $vr2, $vr1
-; LA64-NEXT: vsll.d $vr1, $vr0, $vr1
-; LA64-NEXT: vsrl.d $vr0, $vr0, $vr2
-; LA64-NEXT: vor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vneg.d $vr1, $vr1
+; LA64-NEXT: vrotr.d $vr0, $vr0, $vr1
; LA64-NEXT: vst $vr0, $a0, 0
; LA64-NEXT: ret
%v0 = load <2 x i64>, ptr %src
@@ -233,13 +199,8 @@ define void @rotr_v2i64(ptr %dst, ptr %src, i64 %a0) nounwind {
; LA32: # %bb.0:
; LA32-NEXT: vld $vr0, $a1, 0
; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 0
-; LA32-NEXT: vinsgr2vr.w $vr1, $a3, 1
-; LA32-NEXT: vreplvei.d $vr1, $vr1, 0
-; LA32-NEXT: vrepli.d $vr2, 64
-; LA32-NEXT: vsub.d $vr2, $vr2, $vr1
-; LA32-NEXT: vsrl.d $vr1, $vr0, $vr1
-; LA32-NEXT: vsll.d $vr0, $vr0, $vr2
-; LA32-NEXT: vor.v $vr0, $vr1, $vr0
+; LA32-NEXT: vinsgr2vr.w $vr1, $a2, 2
+; LA32-NEXT: vrotr.d $vr0, $vr0, $vr1
; LA32-NEXT: vst $vr0, $a0, 0
; LA32-NEXT: ret
;
@@ -247,11 +208,7 @@ define void @rotr_v2i64(ptr %dst, ptr %src, i64 %a0) nounwind {
; LA64: # %bb.0:
; LA64-NEXT: vld $vr0, $a1, 0
; LA64-NEXT: vreplgr2vr.d $vr1, $a2
-; LA64-NEXT: vrepli.d $vr2, 64
-; LA64-NEXT: vsub.d $vr2, $vr2, $vr1
-; LA64-NEXT: vsrl.d $vr1, $vr0, $vr1
-; LA64-NEXT: vsll.d $vr0, $vr0, $vr2
-; LA64-NEXT: vor.v $vr0, $vr1, $vr0
+; LA64-NEXT: vrotr.d $vr0, $vr0, $vr1
; LA64-NEXT: vst $vr0, $a0, 0
; LA64-NEXT: ret
%v0 = load <2 x i64>, ptr %src
@@ -266,14 +223,20 @@ define void @rotr_v2i64(ptr %dst, ptr %src, i64 %a0) nounwind {
}
define void @rotr_v2i64_imm(ptr %dst, ptr %src) nounwind {
-; CHECK-LABEL: rotr_v2i64_imm:
-; CHECK: # %bb.0:
-; CHECK-NEXT: vld $vr0, $a1, 0
-; CHECK-NEXT: vsrli.d $vr1, $vr0, 2
-; CHECK-NEXT: vslli.d $vr0, $vr0, 62
-; CHECK-NEXT: vor.v $vr0, $vr0, $vr1
-; CHECK-NEXT: vst $vr0, $a0, 0
-; CHECK-NEXT: ret
+; LA32-LABEL: rotr_v2i64_imm:
+; LA32: # %bb.0:
+; LA32-NEXT: vld $vr0, $a1, 0
+; LA32-NEXT: vrepli.w $vr1, -62
+; LA32-NEXT: vrotr.d $vr0, $vr0, $vr1
+; LA32-NEXT: vst $vr0, $a0, 0
+; LA32-NEXT: ret
+;
+; LA64-LABEL: rotr_v2i64_imm:
+; LA64: # %bb.0:
+; LA64-NEXT: vld $vr0, $a1, 0
+; LA64-NEXT: vrotri.d $vr0, $vr0, 2
+; LA64-NEXT: vst $vr0, $a0, 0
+; LA64-NEXT: ret
%v0 = load <2 x i64>, ptr %src
%b = lshr <2 x i64> %v0, splat (i64 2)
%c = shl <2 x i64> %v0, splat (i64 62)
More information about the llvm-branch-commits
mailing list