[llvm] cf75ef4 - [LoongArch] Add codegen support for ISD::ROTL and ISD::ROTR
Weining Lu via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 9 04:42:55 PDT 2022
Author: gonglingqin
Date: 2022-08-09T19:39:17+08:00
New Revision: cf75ef460c92304b3122a7734cc54de7ff398d2b
URL: https://github.com/llvm/llvm-project/commit/cf75ef460c92304b3122a7734cc54de7ff398d2b
DIFF: https://github.com/llvm/llvm-project/commit/cf75ef460c92304b3122a7734cc54de7ff398d2b.diff
LOG: [LoongArch] Add codegen support for ISD::ROTL and ISD::ROTR
Differential Revision: https://reviews.llvm.org/D131231
Added:
llvm/test/CodeGen/LoongArch/rotl-rotr.ll
Modified:
llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
llvm/lib/Target/LoongArch/LoongArchISelLowering.h
llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index a4641e61ae83..7247dbbd8b0d 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -52,6 +52,7 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRA_PARTS, GRLenVT, Custom);
setOperationAction(ISD::SRL_PARTS, GRLenVT, Custom);
setOperationAction(ISD::FP_TO_SINT, GRLenVT, Custom);
+ setOperationAction(ISD::ROTL, GRLenVT, Expand);
setOperationAction({ISD::GlobalAddress, ISD::ConstantPool}, GRLenVT, Custom);
@@ -65,6 +66,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
setOperationAction(ISD::SRL, MVT::i32, Custom);
setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::BITCAST, MVT::i32, Custom);
+ setOperationAction(ISD::ROTR, MVT::i32, Custom);
+ setOperationAction(ISD::ROTL, MVT::i32, Custom);
if (Subtarget.hasBasicF() && !Subtarget.hasBasicD())
setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
}
@@ -363,6 +366,10 @@ static LoongArchISD::NodeType getLoongArchWOpcode(unsigned Opcode) {
return LoongArchISD::SRA_W;
case ISD::SRL:
return LoongArchISD::SRL_W;
+ case ISD::ROTR:
+ return LoongArchISD::ROTR_W;
+ case ISD::ROTL:
+ return LoongArchISD::ROTL_W;
}
}
@@ -391,6 +398,7 @@ void LoongArchTargetLowering::ReplaceNodeResults(
case ISD::SHL:
case ISD::SRA:
case ISD::SRL:
+ case ISD::ROTR:
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
if (N->getOperand(1).getOpcode() != ISD::Constant) {
@@ -398,6 +406,13 @@ void LoongArchTargetLowering::ReplaceNodeResults(
break;
}
break;
+ case ISD::ROTL:
+ ConstantSDNode *CN;
+ if ((CN = dyn_cast<ConstantSDNode>(N->getOperand(1)))) {
+ Results.push_back(customLegalizeToWOp(N, DAG));
+ break;
+ }
+ break;
case ISD::FP_TO_SINT: {
assert(N->getValueType(0) == MVT::i32 && Subtarget.is64Bit() &&
"Unexpected custom legalisation");
@@ -882,6 +897,8 @@ const char *LoongArchTargetLowering::getTargetNodeName(unsigned Opcode) const {
NODE_NAME_CASE(FTINT)
NODE_NAME_CASE(REVB_2H)
NODE_NAME_CASE(REVB_2W)
+ NODE_NAME_CASE(ROTR_W)
+ NODE_NAME_CASE(ROTL_W)
}
#undef NODE_NAME_CASE
return nullptr;
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
index 9ab567713c2f..dd79ab2237dc 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.h
@@ -35,6 +35,9 @@ enum NodeType : unsigned {
SRA_W,
SRL_W,
+ ROTL_W,
+ ROTR_W,
+
// FPR<->GPR transfer operations
MOVGR2FR_W_LA64,
MOVFR2GR_S_LA64,
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 3be06614871f..19ad28b1dc93 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -51,6 +51,8 @@ def loongarch_ret : SDNode<"LoongArchISD::RET", SDTNone,
def loongarch_sll_w : SDNode<"LoongArchISD::SLL_W", SDT_LoongArchIntBinOpW>;
def loongarch_sra_w : SDNode<"LoongArchISD::SRA_W", SDT_LoongArchIntBinOpW>;
def loongarch_srl_w : SDNode<"LoongArchISD::SRL_W", SDT_LoongArchIntBinOpW>;
+def loongarch_rotr_w : SDNode<"LoongArchISD::ROTR_W", SDT_LoongArchIntBinOpW>;
+def loongarch_rotl_w : SDNode<"LoongArchISD::ROTL_W", SDT_LoongArchIntBinOpW>;
def loongarch_bstrins
: SDNode<"LoongArchISD::BSTRINS", SDT_LoongArchBStrIns>;
def loongarch_bstrpick
@@ -178,6 +180,12 @@ def fpimm0 : PatLeaf<(fpimm), [{return N->isExactlyValue(+0.0);}]>;
def fpimm0neg : PatLeaf<(fpimm), [{return N->isExactlyValue(-0.0);}]>;
def fpimm1 : PatLeaf<(fpimm), [{return N->isExactlyValue(+1.0);}]>;
+// Return an immediate subtracted from 32.
+def ImmSubFrom32 : SDNodeXForm<imm, [{
+ return CurDAG->getTargetConstant(32 - N->getZExtValue(), SDLoc(N),
+ N->getValueType(0));
+}]>;
+
def CallSymbol: AsmOperandClass {
let Name = "CallSymbol";
let RenderMethod = "addImmOperands";
@@ -626,6 +634,8 @@ def : PatGprGpr<urem, MOD_WU>;
def : PatGprGpr<mul, MUL_W>;
def : PatGprGpr<mulhs, MULH_W>;
def : PatGprGpr<mulhu, MULH_WU>;
+def : PatGprGpr<rotr, ROTR_W>;
+def : PatGprImm<rotr, ROTRI_W, uimm5>;
} // Predicates = [IsLA32]
let Predicates = [IsLA64] in {
@@ -639,6 +649,14 @@ def : PatGprGpr<sdiv, DIV_D>;
def : PatGprGpr<udiv, DIV_DU>;
def : PatGprGpr<srem, MOD_D>;
def : PatGprGpr<urem, MOD_DU>;
+def : PatGprGpr<rotr, ROTR_D>;
+def : PatGprGpr<loongarch_rotr_w, ROTR_W>;
+def : PatGprImm<rotr, ROTRI_D, uimm6>;
+def : PatGprImm_32<rotr, ROTRI_W, uimm5>;
+def : Pat<(loongarch_rotl_w GPR:$rj, uimm5:$imm),
+ (ROTRI_W GPR:$rj, (ImmSubFrom32 uimm5:$imm))>;
+def : Pat<(sext_inreg (loongarch_rotl_w GPR:$rj, uimm5:$imm), i32),
+ (ROTRI_W GPR:$rj, (ImmSubFrom32 uimm5:$imm))>;
// TODO: Select "_W[U]" instructions for i32xi32 if only lower 32 bits of the
// product are used.
def : PatGprGpr<mul, MUL_D>;
diff --git a/llvm/test/CodeGen/LoongArch/rotl-rotr.ll b/llvm/test/CodeGen/LoongArch/rotl-rotr.ll
new file mode 100644
index 000000000000..67559b134cbd
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/rotl-rotr.ll
@@ -0,0 +1,629 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64
+
+;; TODO: Add optimization to ISD::ROTL
+
+define i32 @rotl_32(i32 %x, i32 %y) nounwind {
+; LA32-LABEL: rotl_32:
+; LA32: # %bb.0:
+; LA32-NEXT: ori $a2, $zero, 32
+; LA32-NEXT: sub.w $a1, $a2, $a1
+; LA32-NEXT: rotr.w $a0, $a0, $a1
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotl_32:
+; LA64: # %bb.0:
+; LA64-NEXT: sub.d $a2, $zero, $a1
+; LA64-NEXT: sll.w $a1, $a0, $a1
+; LA64-NEXT: srl.w $a0, $a0, $a2
+; LA64-NEXT: or $a0, $a1, $a0
+; LA64-NEXT: jirl $zero, $ra, 0
+ %z = sub i32 32, %y
+ %b = shl i32 %x, %y
+ %c = lshr i32 %x, %z
+ %d = or i32 %b, %c
+ ret i32 %d
+}
+
+define i32 @rotr_32(i32 %x, i32 %y) nounwind {
+; LA32-LABEL: rotr_32:
+; LA32: # %bb.0:
+; LA32-NEXT: rotr.w $a0, $a0, $a1
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotr_32:
+; LA64: # %bb.0:
+; LA64-NEXT: rotr.w $a0, $a0, $a1
+; LA64-NEXT: jirl $zero, $ra, 0
+ %z = sub i32 32, %y
+ %b = lshr i32 %x, %y
+ %c = shl i32 %x, %z
+ %d = or i32 %b, %c
+ ret i32 %d
+}
+
+define i64 @rotl_64(i64 %x, i64 %y) nounwind {
+; LA32-LABEL: rotl_64:
+; LA32: # %bb.0:
+; LA32-NEXT: xori $a3, $a2, 31
+; LA32-NEXT: srli.w $a4, $a0, 1
+; LA32-NEXT: srl.w $a3, $a4, $a3
+; LA32-NEXT: sll.w $a4, $a1, $a2
+; LA32-NEXT: or $a3, $a4, $a3
+; LA32-NEXT: addi.w $a4, $a2, -32
+; LA32-NEXT: slti $a5, $a4, 0
+; LA32-NEXT: maskeqz $a3, $a3, $a5
+; LA32-NEXT: sll.w $a6, $a0, $a4
+; LA32-NEXT: masknez $a5, $a6, $a5
+; LA32-NEXT: or $a3, $a3, $a5
+; LA32-NEXT: ori $a5, $zero, 64
+; LA32-NEXT: sub.w $a5, $a5, $a2
+; LA32-NEXT: xori $a5, $a5, 31
+; LA32-NEXT: slli.w $a6, $a1, 1
+; LA32-NEXT: sll.w $a5, $a6, $a5
+; LA32-NEXT: sub.w $a6, $zero, $a2
+; LA32-NEXT: srl.w $a7, $a1, $a6
+; LA32-NEXT: ori $a1, $zero, 32
+; LA32-NEXT: sub.w $t0, $a1, $a2
+; LA32-NEXT: srai.w $a1, $t0, 31
+; LA32-NEXT: and $a1, $a1, $a7
+; LA32-NEXT: or $a1, $a3, $a1
+; LA32-NEXT: srl.w $a3, $a0, $a6
+; LA32-NEXT: or $a3, $a3, $a5
+; LA32-NEXT: slti $a5, $t0, 0
+; LA32-NEXT: masknez $a6, $a7, $a5
+; LA32-NEXT: maskeqz $a3, $a3, $a5
+; LA32-NEXT: or $a3, $a3, $a6
+; LA32-NEXT: sll.w $a0, $a0, $a2
+; LA32-NEXT: srai.w $a2, $a4, 31
+; LA32-NEXT: and $a0, $a2, $a0
+; LA32-NEXT: or $a0, $a0, $a3
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotl_64:
+; LA64: # %bb.0:
+; LA64-NEXT: ori $a2, $zero, 64
+; LA64-NEXT: sub.d $a1, $a2, $a1
+; LA64-NEXT: rotr.d $a0, $a0, $a1
+; LA64-NEXT: jirl $zero, $ra, 0
+ %z = sub i64 64, %y
+ %b = shl i64 %x, %y
+ %c = lshr i64 %x, %z
+ %d = or i64 %b, %c
+ ret i64 %d
+}
+
+define i64 @rotr_64(i64 %x, i64 %y) nounwind {
+; LA32-LABEL: rotr_64:
+; LA32: # %bb.0:
+; LA32-NEXT: xori $a3, $a2, 31
+; LA32-NEXT: slli.w $a4, $a1, 1
+; LA32-NEXT: sll.w $a3, $a4, $a3
+; LA32-NEXT: srl.w $a4, $a0, $a2
+; LA32-NEXT: or $a3, $a4, $a3
+; LA32-NEXT: addi.w $a4, $a2, -32
+; LA32-NEXT: slti $a5, $a4, 0
+; LA32-NEXT: maskeqz $a3, $a3, $a5
+; LA32-NEXT: srl.w $a6, $a1, $a4
+; LA32-NEXT: masknez $a5, $a6, $a5
+; LA32-NEXT: or $a3, $a3, $a5
+; LA32-NEXT: ori $a5, $zero, 64
+; LA32-NEXT: sub.w $a5, $a5, $a2
+; LA32-NEXT: xori $a5, $a5, 31
+; LA32-NEXT: srli.w $a6, $a0, 1
+; LA32-NEXT: srl.w $a5, $a6, $a5
+; LA32-NEXT: sub.w $a6, $zero, $a2
+; LA32-NEXT: sll.w $a7, $a0, $a6
+; LA32-NEXT: ori $a0, $zero, 32
+; LA32-NEXT: sub.w $t0, $a0, $a2
+; LA32-NEXT: srai.w $a0, $t0, 31
+; LA32-NEXT: and $a0, $a0, $a7
+; LA32-NEXT: or $a0, $a3, $a0
+; LA32-NEXT: sll.w $a3, $a1, $a6
+; LA32-NEXT: or $a3, $a3, $a5
+; LA32-NEXT: slti $a5, $t0, 0
+; LA32-NEXT: masknez $a6, $a7, $a5
+; LA32-NEXT: maskeqz $a3, $a3, $a5
+; LA32-NEXT: or $a3, $a3, $a6
+; LA32-NEXT: srl.w $a1, $a1, $a2
+; LA32-NEXT: srai.w $a2, $a4, 31
+; LA32-NEXT: and $a1, $a2, $a1
+; LA32-NEXT: or $a1, $a1, $a3
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotr_64:
+; LA64: # %bb.0:
+; LA64-NEXT: rotr.d $a0, $a0, $a1
+; LA64-NEXT: jirl $zero, $ra, 0
+ %z = sub i64 64, %y
+ %b = lshr i64 %x, %y
+ %c = shl i64 %x, %z
+ %d = or i64 %b, %c
+ ret i64 %d
+}
+
+define i32 @rotl_32_mask(i32 %x, i32 %y) nounwind {
+; LA32-LABEL: rotl_32_mask:
+; LA32: # %bb.0:
+; LA32-NEXT: sub.w $a1, $zero, $a1
+; LA32-NEXT: rotr.w $a0, $a0, $a1
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotl_32_mask:
+; LA64: # %bb.0:
+; LA64-NEXT: sub.d $a2, $zero, $a1
+; LA64-NEXT: sll.w $a1, $a0, $a1
+; LA64-NEXT: srl.w $a0, $a0, $a2
+; LA64-NEXT: or $a0, $a1, $a0
+; LA64-NEXT: jirl $zero, $ra, 0
+ %z = sub i32 0, %y
+ %and = and i32 %z, 31
+ %b = shl i32 %x, %y
+ %c = lshr i32 %x, %and
+ %d = or i32 %b, %c
+ ret i32 %d
+}
+
+define i32 @rotl_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind {
+; LA32-LABEL: rotl_32_mask_and_63_and_31:
+; LA32: # %bb.0:
+; LA32-NEXT: sub.w $a1, $zero, $a1
+; LA32-NEXT: rotr.w $a0, $a0, $a1
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotl_32_mask_and_63_and_31:
+; LA64: # %bb.0:
+; LA64-NEXT: sub.d $a2, $zero, $a1
+; LA64-NEXT: sll.w $a1, $a0, $a1
+; LA64-NEXT: srl.w $a0, $a0, $a2
+; LA64-NEXT: or $a0, $a1, $a0
+; LA64-NEXT: jirl $zero, $ra, 0
+ %a = and i32 %y, 63
+ %b = shl i32 %x, %a
+ %c = sub i32 0, %y
+ %d = and i32 %c, 31
+ %e = lshr i32 %x, %d
+ %f = or i32 %b, %e
+ ret i32 %f
+}
+
+define i32 @rotl_32_mask_or_64_or_32(i32 %x, i32 %y) nounwind {
+; LA32-LABEL: rotl_32_mask_or_64_or_32:
+; LA32: # %bb.0:
+; LA32-NEXT: sub.w $a1, $zero, $a1
+; LA32-NEXT: rotr.w $a0, $a0, $a1
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotl_32_mask_or_64_or_32:
+; LA64: # %bb.0:
+; LA64-NEXT: sub.d $a2, $zero, $a1
+; LA64-NEXT: sll.w $a1, $a0, $a1
+; LA64-NEXT: srl.w $a0, $a0, $a2
+; LA64-NEXT: or $a0, $a1, $a0
+; LA64-NEXT: jirl $zero, $ra, 0
+ %a = or i32 %y, 64
+ %b = shl i32 %x, %a
+ %c = sub i32 0, %y
+ %d = or i32 %c, 32
+ %e = lshr i32 %x, %d
+ %f = or i32 %b, %e
+ ret i32 %f
+}
+
+define i32 @rotr_32_mask(i32 %x, i32 %y) nounwind {
+; LA32-LABEL: rotr_32_mask:
+; LA32: # %bb.0:
+; LA32-NEXT: rotr.w $a0, $a0, $a1
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotr_32_mask:
+; LA64: # %bb.0:
+; LA64-NEXT: rotr.w $a0, $a0, $a1
+; LA64-NEXT: jirl $zero, $ra, 0
+ %z = sub i32 0, %y
+ %and = and i32 %z, 31
+ %b = lshr i32 %x, %y
+ %c = shl i32 %x, %and
+ %d = or i32 %b, %c
+ ret i32 %d
+}
+
+define i32 @rotr_32_mask_and_63_and_31(i32 %x, i32 %y) nounwind {
+; LA32-LABEL: rotr_32_mask_and_63_and_31:
+; LA32: # %bb.0:
+; LA32-NEXT: rotr.w $a0, $a0, $a1
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotr_32_mask_and_63_and_31:
+; LA64: # %bb.0:
+; LA64-NEXT: rotr.w $a0, $a0, $a1
+; LA64-NEXT: jirl $zero, $ra, 0
+ %a = and i32 %y, 63
+ %b = lshr i32 %x, %a
+ %c = sub i32 0, %y
+ %d = and i32 %c, 31
+ %e = shl i32 %x, %d
+ %f = or i32 %b, %e
+ ret i32 %f
+}
+
+define i32 @rotr_32_mask_or_64_or_32(i32 %x, i32 %y) nounwind {
+; LA32-LABEL: rotr_32_mask_or_64_or_32:
+; LA32: # %bb.0:
+; LA32-NEXT: rotr.w $a0, $a0, $a1
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotr_32_mask_or_64_or_32:
+; LA64: # %bb.0:
+; LA64-NEXT: rotr.w $a0, $a0, $a1
+; LA64-NEXT: jirl $zero, $ra, 0
+ %a = or i32 %y, 64
+ %b = lshr i32 %x, %a
+ %c = sub i32 0, %y
+ %d = or i32 %c, 32
+ %e = shl i32 %x, %d
+ %f = or i32 %b, %e
+ ret i32 %f
+}
+
+define i64 @rotl_64_mask(i64 %x, i64 %y) nounwind {
+; LA32-LABEL: rotl_64_mask:
+; LA32: # %bb.0:
+; LA32-NEXT: xori $a3, $a2, 31
+; LA32-NEXT: srli.w $a4, $a0, 1
+; LA32-NEXT: srl.w $a3, $a4, $a3
+; LA32-NEXT: sll.w $a4, $a1, $a2
+; LA32-NEXT: or $a3, $a4, $a3
+; LA32-NEXT: sub.w $a4, $zero, $a2
+; LA32-NEXT: srl.w $a5, $a1, $a4
+; LA32-NEXT: andi $a6, $a4, 63
+; LA32-NEXT: addi.w $a7, $a6, -32
+; LA32-NEXT: srai.w $t0, $a7, 31
+; LA32-NEXT: and $a5, $t0, $a5
+; LA32-NEXT: addi.w $t0, $a2, -32
+; LA32-NEXT: slti $t1, $t0, 0
+; LA32-NEXT: maskeqz $a3, $a3, $t1
+; LA32-NEXT: sll.w $t2, $a0, $t0
+; LA32-NEXT: masknez $t1, $t2, $t1
+; LA32-NEXT: or $a3, $a3, $t1
+; LA32-NEXT: xori $a6, $a6, 31
+; LA32-NEXT: slli.w $t1, $a1, 1
+; LA32-NEXT: sll.w $a6, $t1, $a6
+; LA32-NEXT: or $a3, $a3, $a5
+; LA32-NEXT: srl.w $a4, $a0, $a4
+; LA32-NEXT: or $a4, $a4, $a6
+; LA32-NEXT: srl.w $a1, $a1, $a7
+; LA32-NEXT: slti $a5, $a7, 0
+; LA32-NEXT: masknez $a1, $a1, $a5
+; LA32-NEXT: maskeqz $a4, $a4, $a5
+; LA32-NEXT: or $a1, $a4, $a1
+; LA32-NEXT: sll.w $a0, $a0, $a2
+; LA32-NEXT: srai.w $a2, $t0, 31
+; LA32-NEXT: and $a0, $a2, $a0
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: move $a1, $a3
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotl_64_mask:
+; LA64: # %bb.0:
+; LA64-NEXT: sub.d $a1, $zero, $a1
+; LA64-NEXT: rotr.d $a0, $a0, $a1
+; LA64-NEXT: jirl $zero, $ra, 0
+ %z = sub i64 0, %y
+ %and = and i64 %z, 63
+ %b = shl i64 %x, %y
+ %c = lshr i64 %x, %and
+ %d = or i64 %b, %c
+ ret i64 %d
+}
+
+define i64 @rotl_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
+; LA32-LABEL: rotl_64_mask_and_127_and_63:
+; LA32: # %bb.0:
+; LA32-NEXT: srli.w $a3, $a0, 1
+; LA32-NEXT: andi $a4, $a2, 127
+; LA32-NEXT: xori $a5, $a4, 31
+; LA32-NEXT: srl.w $a3, $a3, $a5
+; LA32-NEXT: sll.w $a5, $a1, $a2
+; LA32-NEXT: or $a3, $a5, $a3
+; LA32-NEXT: sub.w $a5, $zero, $a2
+; LA32-NEXT: srl.w $a6, $a1, $a5
+; LA32-NEXT: andi $a7, $a5, 63
+; LA32-NEXT: addi.w $t0, $a7, -32
+; LA32-NEXT: srai.w $t1, $t0, 31
+; LA32-NEXT: and $a6, $t1, $a6
+; LA32-NEXT: addi.w $a4, $a4, -32
+; LA32-NEXT: slti $t1, $a4, 0
+; LA32-NEXT: maskeqz $a3, $a3, $t1
+; LA32-NEXT: sll.w $t2, $a0, $a4
+; LA32-NEXT: masknez $t1, $t2, $t1
+; LA32-NEXT: or $a3, $a3, $t1
+; LA32-NEXT: xori $a7, $a7, 31
+; LA32-NEXT: slli.w $t1, $a1, 1
+; LA32-NEXT: sll.w $a7, $t1, $a7
+; LA32-NEXT: or $a3, $a3, $a6
+; LA32-NEXT: srl.w $a5, $a0, $a5
+; LA32-NEXT: or $a5, $a5, $a7
+; LA32-NEXT: srl.w $a1, $a1, $t0
+; LA32-NEXT: slti $a6, $t0, 0
+; LA32-NEXT: masknez $a1, $a1, $a6
+; LA32-NEXT: maskeqz $a5, $a5, $a6
+; LA32-NEXT: or $a1, $a5, $a1
+; LA32-NEXT: sll.w $a0, $a0, $a2
+; LA32-NEXT: srai.w $a2, $a4, 31
+; LA32-NEXT: and $a0, $a2, $a0
+; LA32-NEXT: or $a0, $a0, $a1
+; LA32-NEXT: move $a1, $a3
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotl_64_mask_and_127_and_63:
+; LA64: # %bb.0:
+; LA64-NEXT: sub.d $a1, $zero, $a1
+; LA64-NEXT: rotr.d $a0, $a0, $a1
+; LA64-NEXT: jirl $zero, $ra, 0
+ %a = and i64 %y, 127
+ %b = shl i64 %x, %a
+ %c = sub i64 0, %y
+ %d = and i64 %c, 63
+ %e = lshr i64 %x, %d
+ %f = or i64 %b, %e
+ ret i64 %f
+}
+
+define i64 @rotl_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind {
+; LA32-LABEL: rotl_64_mask_or_128_or_64:
+; LA32: # %bb.0:
+; LA32-NEXT: sll.w $a3, $a0, $a2
+; LA32-NEXT: sub.w $a0, $zero, $a2
+; LA32-NEXT: srl.w $a0, $a1, $a0
+; LA32-NEXT: move $a1, $a3
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotl_64_mask_or_128_or_64:
+; LA64: # %bb.0:
+; LA64-NEXT: sub.d $a1, $zero, $a1
+; LA64-NEXT: rotr.d $a0, $a0, $a1
+; LA64-NEXT: jirl $zero, $ra, 0
+ %a = or i64 %y, 128
+ %b = shl i64 %x, %a
+ %c = sub i64 0, %y
+ %d = or i64 %c, 64
+ %e = lshr i64 %x, %d
+ %f = or i64 %b, %e
+ ret i64 %f
+}
+
+define i64 @rotr_64_mask(i64 %x, i64 %y) nounwind {
+; LA32-LABEL: rotr_64_mask:
+; LA32: # %bb.0:
+; LA32-NEXT: xori $a3, $a2, 31
+; LA32-NEXT: slli.w $a4, $a1, 1
+; LA32-NEXT: sll.w $a3, $a4, $a3
+; LA32-NEXT: srl.w $a4, $a0, $a2
+; LA32-NEXT: or $a3, $a4, $a3
+; LA32-NEXT: sub.w $a4, $zero, $a2
+; LA32-NEXT: sll.w $a5, $a0, $a4
+; LA32-NEXT: andi $a6, $a4, 63
+; LA32-NEXT: addi.w $a7, $a6, -32
+; LA32-NEXT: srai.w $t0, $a7, 31
+; LA32-NEXT: and $a5, $t0, $a5
+; LA32-NEXT: addi.w $t0, $a2, -32
+; LA32-NEXT: slti $t1, $t0, 0
+; LA32-NEXT: maskeqz $a3, $a3, $t1
+; LA32-NEXT: srl.w $t2, $a1, $t0
+; LA32-NEXT: masknez $t1, $t2, $t1
+; LA32-NEXT: or $a3, $a3, $t1
+; LA32-NEXT: xori $a6, $a6, 31
+; LA32-NEXT: srli.w $t1, $a0, 1
+; LA32-NEXT: srl.w $a6, $t1, $a6
+; LA32-NEXT: or $a3, $a3, $a5
+; LA32-NEXT: sll.w $a4, $a1, $a4
+; LA32-NEXT: or $a4, $a4, $a6
+; LA32-NEXT: sll.w $a0, $a0, $a7
+; LA32-NEXT: slti $a5, $a7, 0
+; LA32-NEXT: masknez $a0, $a0, $a5
+; LA32-NEXT: maskeqz $a4, $a4, $a5
+; LA32-NEXT: or $a0, $a4, $a0
+; LA32-NEXT: srl.w $a1, $a1, $a2
+; LA32-NEXT: srai.w $a2, $t0, 31
+; LA32-NEXT: and $a1, $a2, $a1
+; LA32-NEXT: or $a1, $a1, $a0
+; LA32-NEXT: move $a0, $a3
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotr_64_mask:
+; LA64: # %bb.0:
+; LA64-NEXT: rotr.d $a0, $a0, $a1
+; LA64-NEXT: jirl $zero, $ra, 0
+ %z = sub i64 0, %y
+ %and = and i64 %z, 63
+ %b = lshr i64 %x, %y
+ %c = shl i64 %x, %and
+ %d = or i64 %b, %c
+ ret i64 %d
+}
+
+define i64 @rotr_64_mask_and_127_and_63(i64 %x, i64 %y) nounwind {
+; LA32-LABEL: rotr_64_mask_and_127_and_63:
+; LA32: # %bb.0:
+; LA32-NEXT: slli.w $a3, $a1, 1
+; LA32-NEXT: andi $a4, $a2, 127
+; LA32-NEXT: xori $a5, $a4, 31
+; LA32-NEXT: sll.w $a3, $a3, $a5
+; LA32-NEXT: srl.w $a5, $a0, $a2
+; LA32-NEXT: or $a3, $a5, $a3
+; LA32-NEXT: sub.w $a5, $zero, $a2
+; LA32-NEXT: sll.w $a6, $a0, $a5
+; LA32-NEXT: andi $a7, $a5, 63
+; LA32-NEXT: addi.w $t0, $a7, -32
+; LA32-NEXT: srai.w $t1, $t0, 31
+; LA32-NEXT: and $a6, $t1, $a6
+; LA32-NEXT: addi.w $a4, $a4, -32
+; LA32-NEXT: slti $t1, $a4, 0
+; LA32-NEXT: maskeqz $a3, $a3, $t1
+; LA32-NEXT: srl.w $t2, $a1, $a4
+; LA32-NEXT: masknez $t1, $t2, $t1
+; LA32-NEXT: or $a3, $a3, $t1
+; LA32-NEXT: xori $a7, $a7, 31
+; LA32-NEXT: srli.w $t1, $a0, 1
+; LA32-NEXT: srl.w $a7, $t1, $a7
+; LA32-NEXT: or $a3, $a3, $a6
+; LA32-NEXT: sll.w $a5, $a1, $a5
+; LA32-NEXT: or $a5, $a5, $a7
+; LA32-NEXT: sll.w $a0, $a0, $t0
+; LA32-NEXT: slti $a6, $t0, 0
+; LA32-NEXT: masknez $a0, $a0, $a6
+; LA32-NEXT: maskeqz $a5, $a5, $a6
+; LA32-NEXT: or $a0, $a5, $a0
+; LA32-NEXT: srl.w $a1, $a1, $a2
+; LA32-NEXT: srai.w $a2, $a4, 31
+; LA32-NEXT: and $a1, $a2, $a1
+; LA32-NEXT: or $a1, $a1, $a0
+; LA32-NEXT: move $a0, $a3
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotr_64_mask_and_127_and_63:
+; LA64: # %bb.0:
+; LA64-NEXT: rotr.d $a0, $a0, $a1
+; LA64-NEXT: jirl $zero, $ra, 0
+ %a = and i64 %y, 127
+ %b = lshr i64 %x, %a
+ %c = sub i64 0, %y
+ %d = and i64 %c, 63
+ %e = shl i64 %x, %d
+ %f = or i64 %b, %e
+ ret i64 %f
+}
+
+define i64 @rotr_64_mask_or_128_or_64(i64 %x, i64 %y) nounwind {
+; LA32-LABEL: rotr_64_mask_or_128_or_64:
+; LA32: # %bb.0:
+; LA32-NEXT: srl.w $a3, $a1, $a2
+; LA32-NEXT: sub.w $a1, $zero, $a2
+; LA32-NEXT: sll.w $a1, $a0, $a1
+; LA32-NEXT: move $a0, $a3
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotr_64_mask_or_128_or_64:
+; LA64: # %bb.0:
+; LA64-NEXT: rotr.d $a0, $a0, $a1
+; LA64-NEXT: jirl $zero, $ra, 0
+ %a = or i64 %y, 128
+ %b = lshr i64 %x, %a
+ %c = sub i64 0, %y
+ %d = or i64 %c, 64
+ %e = shl i64 %x, %d
+ %f = or i64 %b, %e
+ ret i64 %f
+}
+
+define i32 @rotri_i32(i32 %a) nounwind {
+; LA32-LABEL: rotri_i32:
+; LA32: # %bb.0:
+; LA32-NEXT: rotri.w $a0, $a0, 16
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotri_i32:
+; LA64: # %bb.0:
+; LA64-NEXT: rotri.w $a0, $a0, 16
+; LA64-NEXT: jirl $zero, $ra, 0
+ %shl = shl i32 %a, 16
+ %shr = lshr i32 %a, 16
+ %or = or i32 %shl, %shr
+ ret i32 %or
+}
+
+define i64 @rotri_i64(i64 %a) nounwind {
+; LA32-LABEL: rotri_i64:
+; LA32: # %bb.0:
+; LA32-NEXT: move $a2, $a0
+; LA32-NEXT: move $a0, $a1
+; LA32-NEXT: move $a1, $a2
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotri_i64:
+; LA64: # %bb.0:
+; LA64-NEXT: rotri.d $a0, $a0, 32
+; LA64-NEXT: jirl $zero, $ra, 0
+ %shl = shl i64 %a, 32
+ %shr = lshr i64 %a, 32
+ %or = or i64 %shl, %shr
+ ret i64 %or
+}
+
+declare i32 @llvm.fshl.i32(i32, i32, i32)
+declare i64 @llvm.fshl.i64(i64, i64, i64)
+declare i32 @llvm.fshr.i32(i32, i32, i32)
+declare i64 @llvm.fshr.i64(i64, i64, i64)
+
+define signext i32 @rotl_i32_fshl(i32 signext %a) nounwind {
+; LA32-LABEL: rotl_i32_fshl:
+; LA32: # %bb.0:
+; LA32-NEXT: rotri.w $a0, $a0, 20
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotl_i32_fshl:
+; LA64: # %bb.0:
+; LA64-NEXT: rotri.w $a0, $a0, 20
+; LA64-NEXT: jirl $zero, $ra, 0
+ %or = tail call i32 @llvm.fshl.i32(i32 %a, i32 %a, i32 12)
+ ret i32 %or
+}
+
+define i64 @rotl_i64_fshl(i64 %a) nounwind {
+; LA32-LABEL: rotl_i64_fshl:
+; LA32: # %bb.0:
+; LA32-NEXT: srli.w $a2, $a1, 20
+; LA32-NEXT: slli.w $a3, $a0, 12
+; LA32-NEXT: or $a2, $a3, $a2
+; LA32-NEXT: srli.w $a0, $a0, 20
+; LA32-NEXT: slli.w $a1, $a1, 12
+; LA32-NEXT: or $a1, $a1, $a0
+; LA32-NEXT: move $a0, $a2
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotl_i64_fshl:
+; LA64: # %bb.0:
+; LA64-NEXT: rotri.d $a0, $a0, 52
+; LA64-NEXT: jirl $zero, $ra, 0
+ %or = tail call i64 @llvm.fshl.i64(i64 %a, i64 %a, i64 12)
+ ret i64 %or
+}
+
+define signext i32 @rotr_i32_fshr(i32 signext %a) nounwind {
+; LA32-LABEL: rotr_i32_fshr:
+; LA32: # %bb.0:
+; LA32-NEXT: rotri.w $a0, $a0, 12
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotr_i32_fshr:
+; LA64: # %bb.0:
+; LA64-NEXT: slli.d $a1, $a0, 20
+; LA64-NEXT: bstrpick.d $a0, $a0, 31, 12
+; LA64-NEXT: or $a0, $a0, $a1
+; LA64-NEXT: addi.w $a0, $a0, 0
+; LA64-NEXT: jirl $zero, $ra, 0
+ %or = tail call i32 @llvm.fshr.i32(i32 %a, i32 %a, i32 12)
+ ret i32 %or
+}
+
+define i64 @rotr_i64_fshr(i64 %a) nounwind {
+; LA32-LABEL: rotr_i64_fshr:
+; LA32: # %bb.0:
+; LA32-NEXT: srli.w $a2, $a0, 12
+; LA32-NEXT: slli.w $a3, $a1, 20
+; LA32-NEXT: or $a2, $a3, $a2
+; LA32-NEXT: srli.w $a1, $a1, 12
+; LA32-NEXT: slli.w $a0, $a0, 20
+; LA32-NEXT: or $a1, $a0, $a1
+; LA32-NEXT: move $a0, $a2
+; LA32-NEXT: jirl $zero, $ra, 0
+;
+; LA64-LABEL: rotr_i64_fshr:
+; LA64: # %bb.0:
+; LA64-NEXT: rotri.d $a0, $a0, 12
+; LA64-NEXT: jirl $zero, $ra, 0
+ %or = tail call i64 @llvm.fshr.i64(i64 %a, i64 %a, i64 12)
+ ret i64 %or
+}
More information about the llvm-commits
mailing list