[llvm] [AArch64][SVE2] Generate XAR (PR #77160)
via llvm-commits
llvm-commits at lists.llvm.org
Fri Jan 5 15:52:02 PST 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Usman Nadeem (UsmanNadeem)
<details>
<summary>Changes</summary>
Bitwise exclusive OR and rotate right by immediate
Added a new ISD node for XAR and lower the following rotate pattern
to XAR for appropriate types:
rotr (xor(x, y), imm) -> xar1 (x, y, imm)
Change-Id: If1f649b1bf5365b575dc9fa3e6618e97dc19a066
---
Full diff: https://github.com/llvm/llvm-project/pull/77160.diff
5 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.cpp (+32)
- (modified) llvm/lib/Target/AArch64/AArch64ISelLowering.h (+4)
- (modified) llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td (+10-1)
- (modified) llvm/lib/Target/AArch64/AArch64Subtarget.h (+1)
- (added) llvm/test/CodeGen/AArch64/sve2-xar.ll (+213)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 102fd0c3dae2ab..cd51ce01caee9f 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -1648,6 +1648,11 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
setOperationAction(ISD::FLDEXP, MVT::f16, Custom);
}
+ if (Subtarget->hasSVE2orSME()) {
+ for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64})
+ setOperationAction(ISD::ROTL, VT, Custom);
+ }
+
PredictableSelectIsExpensive = Subtarget->predictableSelectIsExpensive();
IsStrictFPEnabled = true;
@@ -2645,6 +2650,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
MAKE_CASE(AArch64ISD::MSRR)
MAKE_CASE(AArch64ISD::RSHRNB_I)
MAKE_CASE(AArch64ISD::CTTZ_ELTS)
+ MAKE_CASE(AArch64ISD::XAR_I)
}
#undef MAKE_CASE
return nullptr;
@@ -3741,6 +3747,30 @@ getAArch64XALUOOp(AArch64CC::CondCode &CC, SDValue Op, SelectionDAG &DAG) {
return std::make_pair(Value, Overflow);
}
+SDValue AArch64TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
+ EVT VT = Op.getValueType();
+ assert(VT.isScalableVector() && "Expected a scalable vector.");
+ assert(Subtarget->hasSVE2orSME() && "Custom lowering only for SVE2.");
+
+ // rotr (xor(x, y), imm) -> xar1 (x, y, imm)
+ SDValue Xor = Op.getOperand(0);
+ SDValue RotlValue = Op.getOperand(1);
+
+ if (Xor.getOpcode() != ISD::XOR || RotlValue.getOpcode() != ISD::SPLAT_VECTOR)
+ return SDValue();
+ if (!isa<ConstantSDNode>(RotlValue.getOperand(0).getNode()))
+ return SDValue();
+
+ uint64_t RotrAmt =
+ (VT.getScalarSizeInBits() - RotlValue->getConstantOperandVal(0)) %
+ VT.getScalarSizeInBits();
+
+ SDLoc DL(Op);
+ SDValue Ops[] = {Xor.getOperand(0), Xor.getOperand(1),
+ DAG.getTargetConstant(RotrAmt, DL, MVT::i32)};
+ return DAG.getNode(AArch64ISD::XAR_I, DL, VT, Ops);
+}
+
SDValue AArch64TargetLowering::LowerXOR(SDValue Op, SelectionDAG &DAG) const {
if (useSVEForFixedLengthVectorVT(Op.getValueType(),
!Subtarget->isNeonAvailable()))
@@ -6414,6 +6444,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
return LowerFunnelShift(Op, DAG);
case ISD::FLDEXP:
return LowerFLDEXP(Op, DAG);
+ case ISD::ROTL:
+ return LowerROTL(Op, DAG);
}
}
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
index 6ddbcd41dcb769..0d9ebad4ada905 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h
@@ -215,6 +215,9 @@ enum NodeType : unsigned {
// Vector narrowing shift by immediate (bottom)
RSHRNB_I,
+ // Vector bitwise xor and rotate right by immediate
+ XAR_I,
+
// Vector shift by constant and insert
VSLI,
VSRI,
@@ -1143,6 +1146,7 @@ class AArch64TargetLowering : public TargetLowering {
SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerVectorOR(SDValue Op, SelectionDAG &DAG) const;
+ SDValue LowerROTL(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerXOR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const;
diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
index 344a153890631e..6e018afe18bd40 100644
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@@ -454,6 +454,15 @@ def AArch64eor3 : PatFrags<(ops node:$op1, node:$op2, node:$op3),
[(int_aarch64_sve_eor3 node:$op1, node:$op2, node:$op3),
(xor node:$op1, (xor node:$op2, node:$op3))]>;
+def SDT_AArch64xar_Imm : SDTypeProfile<1, 3, [
+ SDTCisVec<0>, SDTCisVec<1>, SDTCisVec<2>, SDTCisInt<3>,
+ SDTCisSameAs<0,1>, SDTCisSameAs<1,2>]>;
+def AArch64xar_node : SDNode<"AArch64ISD::XAR_I", SDT_AArch64xar_Imm>;
+def AArch64xar : PatFrags<(ops node:$op1, node:$op2, node:$op3),
+ [(int_aarch64_sve_xar node:$op1, node:$op2, node:$op3),
+ (AArch64xar_node node:$op1, node:$op2, node:$op3)]>;
+
+
def AArch64fmla_m1 : PatFrags<(ops node:$pg, node:$za, node:$zn, node:$zm),
[(int_aarch64_sve_fmla node:$pg, node:$za, node:$zn, node:$zm),
(vselect node:$pg, (AArch64fadd_p_contract (SVEAllActive), node:$za, (AArch64fmul_p_oneuse (SVEAllActive), node:$zn, node:$zm)), node:$za),
@@ -3721,7 +3730,7 @@ let Predicates = [HasSVE2orSME] in {
defm NBSL_ZZZZ : sve2_int_bitwise_ternary_op<0b111, "nbsl", int_aarch64_sve_nbsl>;
// SVE2 bitwise xor and rotate right by immediate
- defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar", int_aarch64_sve_xar>;
+ defm XAR_ZZZI : sve2_int_rotate_right_imm<"xar", AArch64xar>;
// SVE2 extract vector (immediate offset, constructive)
def EXT_ZZI_B : sve2_int_perm_extract_i_cons<"ext">;
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index b17e215e200dea..a131cf8a6f5402 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -394,6 +394,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
void mirFileLoaded(MachineFunction &MF) const override;
bool hasSVEorSME() const { return hasSVE() || hasSME(); }
+ bool hasSVE2orSME() const { return hasSVE2() || hasSME(); }
// Return the known range for the bit length of SVE data registers. A value
// of 0 means nothing is known about that particular limit beyong what's
diff --git a/llvm/test/CodeGen/AArch64/sve2-xar.ll b/llvm/test/CodeGen/AArch64/sve2-xar.ll
new file mode 100644
index 00000000000000..4b032f74b3a244
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/sve2-xar.ll
@@ -0,0 +1,213 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
+; RUN: llc -mtriple=aarch64 -mattr=+sve < %s -o - | FileCheck --check-prefix=SVE %s
+; RUN: llc -mtriple=aarch64 -mattr=+sve2 < %s -o - | FileCheck --check-prefix=SVE2 %s
+
+define <vscale x 2 x i64> @xar_nxv2i64_l(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
+; SVE-LABEL: xar_nxv2i64_l:
+; SVE: // %bb.0:
+; SVE-NEXT: eor z0.d, z0.d, z1.d
+; SVE-NEXT: lsr z1.d, z0.d, #4
+; SVE-NEXT: lsl z0.d, z0.d, #60
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: xar_nxv2i64_l:
+; SVE2: // %bb.0:
+; SVE2-NEXT: xar z0.d, z0.d, z1.d, #4
+; SVE2-NEXT: ret
+ %a = xor <vscale x 2 x i64> %x, %y
+ %b = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 60, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
+ ret <vscale x 2 x i64> %b
+}
+
+define <vscale x 2 x i64> @xar_nxv2i64_r(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
+; SVE-LABEL: xar_nxv2i64_r:
+; SVE: // %bb.0:
+; SVE-NEXT: eor z0.d, z0.d, z1.d
+; SVE-NEXT: lsl z1.d, z0.d, #60
+; SVE-NEXT: lsr z0.d, z0.d, #4
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: xar_nxv2i64_r:
+; SVE2: // %bb.0:
+; SVE2-NEXT: xar z0.d, z0.d, z1.d, #4
+; SVE2-NEXT: ret
+ %a = xor <vscale x 2 x i64> %x, %y
+ %b = call <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 4, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
+ ret <vscale x 2 x i64> %b
+}
+
+
+define <vscale x 4 x i32> @xar_nxv4i32_l(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; SVE-LABEL: xar_nxv4i32_l:
+; SVE: // %bb.0:
+; SVE-NEXT: eor z0.d, z0.d, z1.d
+; SVE-NEXT: lsr z1.s, z0.s, #4
+; SVE-NEXT: lsl z0.s, z0.s, #28
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: xar_nxv4i32_l:
+; SVE2: // %bb.0:
+; SVE2-NEXT: xar z0.s, z0.s, z1.s, #4
+; SVE2-NEXT: ret
+ %a = xor <vscale x 4 x i32> %x, %y
+ %b = call <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 28, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
+ ret <vscale x 4 x i32> %b
+}
+
+define <vscale x 4 x i32> @xar_nxv4i32_r(<vscale x 4 x i32> %x, <vscale x 4 x i32> %y) {
+; SVE-LABEL: xar_nxv4i32_r:
+; SVE: // %bb.0:
+; SVE-NEXT: eor z0.d, z0.d, z1.d
+; SVE-NEXT: lsl z1.s, z0.s, #28
+; SVE-NEXT: lsr z0.s, z0.s, #4
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: xar_nxv4i32_r:
+; SVE2: // %bb.0:
+; SVE2-NEXT: xar z0.s, z0.s, z1.s, #4
+; SVE2-NEXT: ret
+ %a = xor <vscale x 4 x i32> %x, %y
+ %b = call <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %a, <vscale x 4 x i32> shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 4, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer))
+ ret <vscale x 4 x i32> %b
+}
+
+define <vscale x 8 x i16> @xar_nxv8i16_l(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; SVE-LABEL: xar_nxv8i16_l:
+; SVE: // %bb.0:
+; SVE-NEXT: eor z0.d, z0.d, z1.d
+; SVE-NEXT: lsr z1.h, z0.h, #4
+; SVE-NEXT: lsl z0.h, z0.h, #12
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: xar_nxv8i16_l:
+; SVE2: // %bb.0:
+; SVE2-NEXT: xar z0.h, z0.h, z1.h, #4
+; SVE2-NEXT: ret
+ %a = xor <vscale x 8 x i16> %x, %y
+ %b = call <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %a, <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 12, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+ ret <vscale x 8 x i16> %b
+}
+
+define <vscale x 8 x i16> @xar_nxv8i16_r(<vscale x 8 x i16> %x, <vscale x 8 x i16> %y) {
+; SVE-LABEL: xar_nxv8i16_r:
+; SVE: // %bb.0:
+; SVE-NEXT: eor z0.d, z0.d, z1.d
+; SVE-NEXT: lsl z1.h, z0.h, #12
+; SVE-NEXT: lsr z0.h, z0.h, #4
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: xar_nxv8i16_r:
+; SVE2: // %bb.0:
+; SVE2-NEXT: xar z0.h, z0.h, z1.h, #4
+; SVE2-NEXT: ret
+ %a = xor <vscale x 8 x i16> %x, %y
+ %b = call <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %a, <vscale x 8 x i16> shufflevector (<vscale x 8 x i16> insertelement (<vscale x 8 x i16> poison, i16 4, i32 0), <vscale x 8 x i16> poison, <vscale x 8 x i32> zeroinitializer))
+ ret <vscale x 8 x i16> %b
+}
+
+define <vscale x 16 x i8> @xar_nxv16i8_l(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
+; SVE-LABEL: xar_nxv16i8_l:
+; SVE: // %bb.0:
+; SVE-NEXT: eor z0.d, z0.d, z1.d
+; SVE-NEXT: lsr z1.b, z0.b, #4
+; SVE-NEXT: lsl z0.b, z0.b, #4
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: xar_nxv16i8_l:
+; SVE2: // %bb.0:
+; SVE2-NEXT: xar z0.b, z0.b, z1.b, #4
+; SVE2-NEXT: ret
+ %a = xor <vscale x 16 x i8> %x, %y
+ %b = call <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %a, <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 4, i32 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer))
+ ret <vscale x 16 x i8> %b
+}
+
+define <vscale x 16 x i8> @xar_nxv16i8_r(<vscale x 16 x i8> %x, <vscale x 16 x i8> %y) {
+; SVE-LABEL: xar_nxv16i8_r:
+; SVE: // %bb.0:
+; SVE-NEXT: eor z0.d, z0.d, z1.d
+; SVE-NEXT: lsl z1.b, z0.b, #4
+; SVE-NEXT: lsr z0.b, z0.b, #4
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: xar_nxv16i8_r:
+; SVE2: // %bb.0:
+; SVE2-NEXT: xar z0.b, z0.b, z1.b, #4
+; SVE2-NEXT: ret
+ %a = xor <vscale x 16 x i8> %x, %y
+ %b = call <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %a, <vscale x 16 x i8> shufflevector (<vscale x 16 x i8> insertelement (<vscale x 16 x i8> poison, i8 4, i32 0), <vscale x 16 x i8> poison, <vscale x 16 x i32> zeroinitializer))
+ ret <vscale x 16 x i8> %b
+}
+
+
+define <vscale x 2 x i64> @xar_nxv2i64_l_neg1(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y, <vscale x 2 x i64> %z) {
+; SVE-LABEL: xar_nxv2i64_l_neg1:
+; SVE: // %bb.0:
+; SVE-NEXT: mov z3.d, z2.d
+; SVE-NEXT: ptrue p0.d
+; SVE-NEXT: subr z2.d, z2.d, #0 // =0x0
+; SVE-NEXT: eor z0.d, z0.d, z1.d
+; SVE-NEXT: and z2.d, z2.d, #0x3f
+; SVE-NEXT: and z3.d, z3.d, #0x3f
+; SVE-NEXT: movprfx z1, z0
+; SVE-NEXT: lsl z1.d, p0/m, z1.d, z3.d
+; SVE-NEXT: lsr z0.d, p0/m, z0.d, z2.d
+; SVE-NEXT: orr z0.d, z1.d, z0.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: xar_nxv2i64_l_neg1:
+; SVE2: // %bb.0:
+; SVE2-NEXT: mov z3.d, z2.d
+; SVE2-NEXT: ptrue p0.d
+; SVE2-NEXT: subr z2.d, z2.d, #0 // =0x0
+; SVE2-NEXT: eor z0.d, z0.d, z1.d
+; SVE2-NEXT: and z2.d, z2.d, #0x3f
+; SVE2-NEXT: and z3.d, z3.d, #0x3f
+; SVE2-NEXT: movprfx z1, z0
+; SVE2-NEXT: lsl z1.d, p0/m, z1.d, z3.d
+; SVE2-NEXT: lsr z0.d, p0/m, z0.d, z2.d
+; SVE2-NEXT: orr z0.d, z1.d, z0.d
+; SVE2-NEXT: ret
+ %a = xor <vscale x 2 x i64> %x, %y
+ %b = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> %z)
+ ret <vscale x 2 x i64> %b
+}
+
+; TODO: We could use usra instruction here.
+define <vscale x 2 x i64> @xar_nxv2i64_l_neg2(<vscale x 2 x i64> %x, <vscale x 2 x i64> %y) {
+; SVE-LABEL: xar_nxv2i64_l_neg2:
+; SVE: // %bb.0:
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: lsr z1.d, z0.d, #4
+; SVE-NEXT: lsl z0.d, z0.d, #60
+; SVE-NEXT: orr z0.d, z0.d, z1.d
+; SVE-NEXT: ret
+;
+; SVE2-LABEL: xar_nxv2i64_l_neg2:
+; SVE2: // %bb.0:
+; SVE2-NEXT: orr z0.d, z0.d, z1.d
+; SVE2-NEXT: lsr z1.d, z0.d, #4
+; SVE2-NEXT: lsl z0.d, z0.d, #60
+; SVE2-NEXT: orr z0.d, z0.d, z1.d
+; SVE2-NEXT: ret
+ %a = or <vscale x 2 x i64> %x, %y
+ %b = call <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %a, <vscale x 2 x i64> shufflevector (<vscale x 2 x i64> insertelement (<vscale x 2 x i64> poison, i64 60, i32 0), <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer))
+ ret <vscale x 2 x i64> %b
+}
+
+declare <vscale x 2 x i64> @llvm.fshl.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i32> @llvm.fshl.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i16> @llvm.fshl.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i8> @llvm.fshl.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
+declare <vscale x 2 x i64> @llvm.fshr.nxv2i64(<vscale x 2 x i64>, <vscale x 2 x i64>, <vscale x 2 x i64>)
+declare <vscale x 4 x i32> @llvm.fshr.nxv4i32(<vscale x 4 x i32>, <vscale x 4 x i32>, <vscale x 4 x i32>)
+declare <vscale x 8 x i16> @llvm.fshr.nxv8i16(<vscale x 8 x i16>, <vscale x 8 x i16>, <vscale x 8 x i16>)
+declare <vscale x 16 x i8> @llvm.fshr.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>, <vscale x 16 x i8>)
``````````
</details>
https://github.com/llvm/llvm-project/pull/77160
More information about the llvm-commits
mailing list