[llvm] 69cc5a4 - [LegalizeTypes] Support promotion for vp bitmanip sdnodes.
Yeting Kuo via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 10 17:27:49 PDT 2023
Author: Yeting Kuo
Date: 2023-08-11T08:27:42+08:00
New Revision: 69cc5a4e1a2a3d8e6a5086770fc7539578bdeec7
URL: https://github.com/llvm/llvm-project/commit/69cc5a4e1a2a3d8e6a5086770fc7539578bdeec7
DIFF: https://github.com/llvm/llvm-project/commit/69cc5a4e1a2a3d8e6a5086770fc7539578bdeec7.diff
LOG: [LegalizeTypes] Support promotion for vp bitmanip sdnodes.
This support promotion for vp.bitreverse/bswap/ctlz/ctlz_zero_undef/cttz/cttz_zero_undef/ctpop/fshr/fshl.
Reviewed By: craig.topper, luke
Differential Revision: https://reviews.llvm.org/D157607
Added:
Modified:
llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll
Removed:
################################################################################
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index b9268cee11d944..b11aaa90388730 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -60,14 +60,21 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
case ISD::AssertSext: Res = PromoteIntRes_AssertSext(N); break;
case ISD::AssertZext: Res = PromoteIntRes_AssertZext(N); break;
case ISD::BITCAST: Res = PromoteIntRes_BITCAST(N); break;
+ case ISD::VP_BITREVERSE:
case ISD::BITREVERSE: Res = PromoteIntRes_BITREVERSE(N); break;
+ case ISD::VP_BSWAP:
case ISD::BSWAP: Res = PromoteIntRes_BSWAP(N); break;
case ISD::BUILD_PAIR: Res = PromoteIntRes_BUILD_PAIR(N); break;
case ISD::Constant: Res = PromoteIntRes_Constant(N); break;
+ case ISD::VP_CTLZ_ZERO_UNDEF:
+ case ISD::VP_CTLZ:
case ISD::CTLZ_ZERO_UNDEF:
case ISD::CTLZ: Res = PromoteIntRes_CTLZ(N); break;
case ISD::PARITY:
+ case ISD::VP_CTPOP:
case ISD::CTPOP: Res = PromoteIntRes_CTPOP_PARITY(N); break;
+ case ISD::VP_CTTZ_ZERO_UNDEF:
+ case ISD::VP_CTTZ:
case ISD::CTTZ_ZERO_UNDEF:
case ISD::CTTZ: Res = PromoteIntRes_CTTZ(N); break;
case ISD::EXTRACT_VECTOR_ELT:
@@ -283,6 +290,11 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
Res = PromoteIntRes_FunnelShift(N);
break;
+ case ISD::VP_FSHL:
+ case ISD::VP_FSHR:
+ Res = PromoteIntRes_VPFunnelShift(N);
+ break;
+
case ISD::IS_FPCLASS:
Res = PromoteIntRes_IS_FPCLASS(N);
break;
@@ -516,8 +528,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) {
}
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
- DAG.getShiftAmountConstant(DiffBits, NVT, dl));
+ SDValue ShAmt = DAG.getShiftAmountConstant(DiffBits, NVT, dl);
+ if (N->getOpcode() == ISD::BSWAP)
+ return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op),
+ ShAmt);
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+ return DAG.getNode(ISD::VP_LSHR, dl, NVT,
+ DAG.getNode(ISD::VP_BSWAP, dl, NVT, Op, Mask, EVL), ShAmt,
+ Mask, EVL);
}
SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
@@ -537,9 +556,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITREVERSE(SDNode *N) {
}
unsigned DiffBits = NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits();
- return DAG.getNode(ISD::SRL, dl, NVT,
- DAG.getNode(ISD::BITREVERSE, dl, NVT, Op),
- DAG.getShiftAmountConstant(DiffBits, NVT, dl));
+ SDValue ShAmt = DAG.getShiftAmountConstant(DiffBits, NVT, dl);
+ if (N->getOpcode() == ISD::BITREVERSE)
+ return DAG.getNode(ISD::SRL, dl, NVT,
+ DAG.getNode(ISD::BITREVERSE, dl, NVT, Op), ShAmt);
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+ return DAG.getNode(ISD::VP_LSHR, dl, NVT,
+ DAG.getNode(ISD::VP_BITREVERSE, dl, NVT, Op, Mask, EVL),
+ ShAmt, Mask, EVL);
}
SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) {
@@ -584,12 +609,19 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
// Zero extend to the promoted type and do the count there.
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
- Op = DAG.getNode(N->getOpcode(), dl, NVT, Op);
+
// Subtract off the extra leading bits in the bigger type.
- return DAG.getNode(
- ISD::SUB, dl, NVT, Op,
- DAG.getConstant(NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl,
- NVT));
+ SDValue ExtractLeadingBits = DAG.getConstant(
+ NVT.getScalarSizeInBits() - OVT.getScalarSizeInBits(), dl, NVT);
+ if (!N->isVPOpcode())
+ return DAG.getNode(ISD::SUB, dl, NVT,
+ DAG.getNode(N->getOpcode(), dl, NVT, Op),
+ ExtractLeadingBits);
+ SDValue Mask = N->getOperand(1);
+ SDValue EVL = N->getOperand(2);
+ return DAG.getNode(ISD::VP_SUB, dl, NVT,
+ DAG.getNode(N->getOpcode(), dl, NVT, Op, Mask, EVL),
+ ExtractLeadingBits, Mask, EVL);
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) {
@@ -611,7 +643,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP_PARITY(SDNode *N) {
// Zero extend to the promoted type and do the count or parity there.
SDValue Op = ZExtPromotedInteger(N->getOperand(0));
- return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op);
+ if (!N->isVPOpcode())
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op);
+ return DAG.getNode(N->getOpcode(), SDLoc(N), Op.getValueType(), Op,
+ N->getOperand(1), N->getOperand(2));
}
SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
@@ -635,15 +670,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) {
}
}
- if (N->getOpcode() == ISD::CTTZ) {
+ if (N->getOpcode() == ISD::CTTZ || N->getOpcode() == ISD::VP_CTTZ) {
// The count is the same in the promoted type except if the original
// value was zero. This can be handled by setting the bit just off
// the top of the original type.
auto TopBit = APInt::getOneBitSet(NVT.getScalarSizeInBits(),
OVT.getScalarSizeInBits());
- Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT));
+ if (N->getOpcode() == ISD::CTTZ)
+ Op = DAG.getNode(ISD::OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT));
+ else
+ Op =
+ DAG.getNode(ISD::VP_OR, dl, NVT, Op, DAG.getConstant(TopBit, dl, NVT),
+ N->getOperand(1), N->getOperand(2));
}
- return DAG.getNode(N->getOpcode(), dl, NVT, Op);
+ if (!N->isVPOpcode())
+ return DAG.getNode(N->getOpcode(), dl, NVT, Op);
+ return DAG.getNode(N->getOpcode(), dl, NVT, Op, N->getOperand(1),
+ N->getOperand(2));
}
SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) {
@@ -1366,6 +1409,60 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FunnelShift(SDNode *N) {
return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amt);
}
+// A vp version of PromoteIntRes_FunnelShift.
+SDValue DAGTypeLegalizer::PromoteIntRes_VPFunnelShift(SDNode *N) {
+ SDValue Hi = GetPromotedInteger(N->getOperand(0));
+ SDValue Lo = GetPromotedInteger(N->getOperand(1));
+ SDValue Amt = N->getOperand(2);
+ SDValue Mask = N->getOperand(3);
+ SDValue EVL = N->getOperand(4);
+ if (getTypeAction(Amt.getValueType()) == TargetLowering::TypePromoteInteger)
+ Amt = ZExtPromotedInteger(Amt);
+ EVT AmtVT = Amt.getValueType();
+
+ SDLoc DL(N);
+ EVT OldVT = N->getOperand(0).getValueType();
+ EVT VT = Lo.getValueType();
+ unsigned Opcode = N->getOpcode();
+ bool IsFSHR = Opcode == ISD::VP_FSHR;
+ unsigned OldBits = OldVT.getScalarSizeInBits();
+ unsigned NewBits = VT.getScalarSizeInBits();
+
+ // Amount has to be interpreted modulo the old bit width.
+ Amt = DAG.getNode(ISD::VP_UREM, DL, AmtVT, Amt,
+ DAG.getConstant(OldBits, DL, AmtVT), Mask, EVL);
+
+ // If the promoted type is twice the size (or more), then we use the
+ // traditional funnel 'double' shift codegen. This isn't necessary if the
+ // shift amount is constant.
+ // fshl(x,y,z) -> (((aext(x) << bw) | zext(y)) << (z % bw)) >> bw.
+ // fshr(x,y,z) -> (((aext(x) << bw) | zext(y)) >> (z % bw)).
+ if (NewBits >= (2 * OldBits) && !isa<ConstantSDNode>(Amt) &&
+ !TLI.isOperationLegalOrCustom(Opcode, VT)) {
+ SDValue HiShift = DAG.getConstant(OldBits, DL, VT);
+ Hi = DAG.getNode(ISD::VP_SHL, DL, VT, Hi, HiShift, Mask, EVL);
+ // FIXME: Replace it by vp operations.
+ Lo = DAG.getZeroExtendInReg(Lo, DL, OldVT);
+ SDValue Res = DAG.getNode(ISD::VP_OR, DL, VT, Hi, Lo, Mask, EVL);
+ Res = DAG.getNode(IsFSHR ? ISD::VP_LSHR : ISD::VP_SHL, DL, VT, Res, Amt,
+ Mask, EVL);
+ if (!IsFSHR)
+ Res = DAG.getNode(ISD::VP_LSHR, DL, VT, Res, HiShift, Mask, EVL);
+ return Res;
+ }
+
+ // Shift Lo up to occupy the upper bits of the promoted type.
+ SDValue ShiftOffset = DAG.getConstant(NewBits - OldBits, DL, AmtVT);
+ Lo = DAG.getNode(ISD::VP_SHL, DL, VT, Lo, ShiftOffset, Mask, EVL);
+
+ // Increase Amount to shift the result into the lower bits of the promoted
+ // type.
+ if (IsFSHR)
+ Amt = DAG.getNode(ISD::VP_ADD, DL, AmtVT, Amt, ShiftOffset, Mask, EVL);
+
+ return DAG.getNode(Opcode, DL, VT, Hi, Lo, Amt, Mask, EVL);
+}
+
SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) {
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
SDValue Res;
diff --git a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index db8f61eee6062c..3e78c5eba67737 100644
--- a/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -362,6 +362,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
SDValue PromoteIntRes_ABS(SDNode *N);
SDValue PromoteIntRes_Rotate(SDNode *N);
SDValue PromoteIntRes_FunnelShift(SDNode *N);
+ SDValue PromoteIntRes_VPFunnelShift(SDNode *N);
SDValue PromoteIntRes_IS_FPCLASS(SDNode *N);
// Integer Operand Promotion.
diff --git a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
index 2a0bea04648f8b..05361f48cd511d 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bitreverse-vp.ll
@@ -4203,3 +4203,76 @@ define <vscale x 64 x i16> @vp_bitreverse_nxv64i16_unmasked(<vscale x 64 x i16>
%v = call <vscale x 64 x i16> @llvm.vp.bitreverse.nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 %evl)
ret <vscale x 64 x i16> %v
}
+
+; Test promotion.
+declare <vscale x 1 x i9> @llvm.vp.bitreverse.nxv1i9(<vscale x 1 x i9>, <vscale x 1 x i1>, i32)
+define <vscale x 1 x i9> @vp_bitreverse_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_bitreverse_nxv1i9:
+; RV32: # %bb.0:
+; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; RV32-NEXT: vsrl.vi v9, v8, 8, v0.t
+; RV32-NEXT: vsll.vi v8, v8, 8, v0.t
+; RV32-NEXT: vor.vv v8, v8, v9, v0.t
+; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
+; RV32-NEXT: lui a0, 1
+; RV32-NEXT: addi a0, a0, -241
+; RV32-NEXT: vand.vx v9, v9, a0, v0.t
+; RV32-NEXT: vand.vx v8, v8, a0, v0.t
+; RV32-NEXT: vsll.vi v8, v8, 4, v0.t
+; RV32-NEXT: vor.vv v8, v9, v8, v0.t
+; RV32-NEXT: vsrl.vi v9, v8, 2, v0.t
+; RV32-NEXT: lui a0, 3
+; RV32-NEXT: addi a0, a0, 819
+; RV32-NEXT: vand.vx v9, v9, a0, v0.t
+; RV32-NEXT: vand.vx v8, v8, a0, v0.t
+; RV32-NEXT: vsll.vi v8, v8, 2, v0.t
+; RV32-NEXT: vor.vv v8, v9, v8, v0.t
+; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
+; RV32-NEXT: lui a0, 5
+; RV32-NEXT: addi a0, a0, 1365
+; RV32-NEXT: vand.vx v9, v9, a0, v0.t
+; RV32-NEXT: vand.vx v8, v8, a0, v0.t
+; RV32-NEXT: vsll.vi v8, v8, 1, v0.t
+; RV32-NEXT: vor.vv v8, v9, v8, v0.t
+; RV32-NEXT: vsrl.vi v8, v8, 7, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vp_bitreverse_nxv1i9:
+; RV64: # %bb.0:
+; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; RV64-NEXT: vsrl.vi v9, v8, 8, v0.t
+; RV64-NEXT: vsll.vi v8, v8, 8, v0.t
+; RV64-NEXT: vor.vv v8, v8, v9, v0.t
+; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
+; RV64-NEXT: lui a0, 1
+; RV64-NEXT: addiw a0, a0, -241
+; RV64-NEXT: vand.vx v9, v9, a0, v0.t
+; RV64-NEXT: vand.vx v8, v8, a0, v0.t
+; RV64-NEXT: vsll.vi v8, v8, 4, v0.t
+; RV64-NEXT: vor.vv v8, v9, v8, v0.t
+; RV64-NEXT: vsrl.vi v9, v8, 2, v0.t
+; RV64-NEXT: lui a0, 3
+; RV64-NEXT: addiw a0, a0, 819
+; RV64-NEXT: vand.vx v9, v9, a0, v0.t
+; RV64-NEXT: vand.vx v8, v8, a0, v0.t
+; RV64-NEXT: vsll.vi v8, v8, 2, v0.t
+; RV64-NEXT: vor.vv v8, v9, v8, v0.t
+; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
+; RV64-NEXT: lui a0, 5
+; RV64-NEXT: addiw a0, a0, 1365
+; RV64-NEXT: vand.vx v9, v9, a0, v0.t
+; RV64-NEXT: vand.vx v8, v8, a0, v0.t
+; RV64-NEXT: vsll.vi v8, v8, 1, v0.t
+; RV64-NEXT: vor.vv v8, v9, v8, v0.t
+; RV64-NEXT: vsrl.vi v8, v8, 7, v0.t
+; RV64-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_bitreverse_nxv1i9:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT: vbrev.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: vsrl.vi v8, v8, 7, v0.t
+; CHECK-ZVBB-NEXT: ret
+ %v = call <vscale x 1 x i9> @llvm.vp.bitreverse.nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i9> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
index 365869cc2ab435..244236fe77749c 100644
--- a/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/bswap-vp.ll
@@ -1828,3 +1828,92 @@ define <vscale x 64 x i16> @vp_bswap_nxv64i16_unmasked(<vscale x 64 x i16> %va,
%v = call <vscale x 64 x i16> @llvm.vp.bswap.nxv64i16(<vscale x 64 x i16> %va, <vscale x 64 x i1> %m, i32 %evl)
ret <vscale x 64 x i16> %v
}
+
+; Test promotion.
+declare <vscale x 1 x i48> @llvm.vp.bswap.nxv1i48(<vscale x 1 x i48>, <vscale x 1 x i1>, i32)
+define <vscale x 1 x i48> @vp_bswap_nxv1i48(<vscale x 1 x i48> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_bswap_nxv1i48:
+; RV32: # %bb.0:
+; RV32-NEXT: addi sp, sp, -16
+; RV32-NEXT: .cfi_def_cfa_offset 16
+; RV32-NEXT: sw zero, 12(sp)
+; RV32-NEXT: lui a1, 1044480
+; RV32-NEXT: sw a1, 8(sp)
+; RV32-NEXT: li a1, 56
+; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT: vsrl.vx v9, v8, a1, v0.t
+; RV32-NEXT: li a2, 40
+; RV32-NEXT: vsrl.vx v10, v8, a2, v0.t
+; RV32-NEXT: lui a3, 16
+; RV32-NEXT: addi a3, a3, -256
+; RV32-NEXT: vand.vx v10, v10, a3, v0.t
+; RV32-NEXT: vor.vv v9, v10, v9, v0.t
+; RV32-NEXT: vsrl.vi v10, v8, 24, v0.t
+; RV32-NEXT: lui a4, 4080
+; RV32-NEXT: vand.vx v10, v10, a4, v0.t
+; RV32-NEXT: vsrl.vi v11, v8, 8, v0.t
+; RV32-NEXT: addi a5, sp, 8
+; RV32-NEXT: vsetvli a6, zero, e64, m1, ta, ma
+; RV32-NEXT: vlse64.v v12, (a5), zero
+; RV32-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; RV32-NEXT: vand.vv v11, v11, v12, v0.t
+; RV32-NEXT: vor.vv v10, v11, v10, v0.t
+; RV32-NEXT: vor.vv v9, v10, v9, v0.t
+; RV32-NEXT: vsll.vx v10, v8, a1, v0.t
+; RV32-NEXT: vand.vx v11, v8, a3, v0.t
+; RV32-NEXT: vsll.vx v11, v11, a2, v0.t
+; RV32-NEXT: vor.vv v10, v10, v11, v0.t
+; RV32-NEXT: vand.vx v11, v8, a4, v0.t
+; RV32-NEXT: vsll.vi v11, v11, 24, v0.t
+; RV32-NEXT: vand.vv v8, v8, v12, v0.t
+; RV32-NEXT: vsll.vi v8, v8, 8, v0.t
+; RV32-NEXT: vor.vv v8, v11, v8, v0.t
+; RV32-NEXT: vor.vv v8, v10, v8, v0.t
+; RV32-NEXT: vor.vv v8, v8, v9, v0.t
+; RV32-NEXT: vsrl.vi v8, v8, 16, v0.t
+; RV32-NEXT: addi sp, sp, 16
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vp_bswap_nxv1i48:
+; RV64: # %bb.0:
+; RV64-NEXT: lui a1, 4080
+; RV64-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; RV64-NEXT: vand.vx v9, v8, a1, v0.t
+; RV64-NEXT: vsll.vi v9, v9, 24, v0.t
+; RV64-NEXT: li a0, 255
+; RV64-NEXT: slli a0, a0, 24
+; RV64-NEXT: vand.vx v10, v8, a0, v0.t
+; RV64-NEXT: vsll.vi v10, v10, 8, v0.t
+; RV64-NEXT: vor.vv v9, v9, v10, v0.t
+; RV64-NEXT: li a2, 56
+; RV64-NEXT: vsll.vx v10, v8, a2, v0.t
+; RV64-NEXT: lui a3, 16
+; RV64-NEXT: addiw a3, a3, -256
+; RV64-NEXT: vand.vx v11, v8, a3, v0.t
+; RV64-NEXT: li a4, 40
+; RV64-NEXT: vsll.vx v11, v11, a4, v0.t
+; RV64-NEXT: vor.vv v10, v10, v11, v0.t
+; RV64-NEXT: vor.vv v9, v10, v9, v0.t
+; RV64-NEXT: vsrl.vx v10, v8, a2, v0.t
+; RV64-NEXT: vsrl.vx v11, v8, a4, v0.t
+; RV64-NEXT: vand.vx v11, v11, a3, v0.t
+; RV64-NEXT: vor.vv v10, v11, v10, v0.t
+; RV64-NEXT: vsrl.vi v11, v8, 24, v0.t
+; RV64-NEXT: vand.vx v11, v11, a1, v0.t
+; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
+; RV64-NEXT: vand.vx v8, v8, a0, v0.t
+; RV64-NEXT: vor.vv v8, v8, v11, v0.t
+; RV64-NEXT: vor.vv v8, v8, v10, v0.t
+; RV64-NEXT: vor.vv v8, v9, v8, v0.t
+; RV64-NEXT: vsrl.vi v8, v8, 16, v0.t
+; RV64-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_bswap_nxv1i48:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e64, m1, ta, ma
+; CHECK-ZVBB-NEXT: vrev8.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: vsrl.vi v8, v8, 16, v0.t
+; CHECK-ZVBB-NEXT: ret
+ %v = call <vscale x 1 x i48> @llvm.vp.bswap.nxv1i48(<vscale x 1 x i48> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i48> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
index b768a65a3ddd3a..bb0ff1c2bf09b0 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctlz-vp.ll
@@ -2795,3 +2795,70 @@ define <vscale x 16 x i64> @vp_ctlz_zero_undef_nxv16i64_unmasked(<vscale x 16 x
%v = call <vscale x 16 x i64> @llvm.vp.ctlz.nxv16i64(<vscale x 16 x i64> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x i64> %v
}
+
+; Test promotion.
+declare <vscale x 1 x i9> @llvm.vp.ctlz.nxv1i9(<vscale x 1 x i9>, i1 immarg, <vscale x 1 x i1>, i32)
+define <vscale x 1 x i9> @vp_ctlz_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ctlz_nxv1i9:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 511
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 16
+; CHECK-NEXT: vminu.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 7
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_nxv1i9:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: li a1, 511
+; CHECK-ZVBB-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT: vand.vx v8, v8, a1
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: li a0, 7
+; CHECK-ZVBB-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-ZVBB-NEXT: ret
+ %v = call <vscale x 1 x i9> @llvm.vp.ctlz.nxv1i9(<vscale x 1 x i9> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i9> %v
+}
+define <vscale x 1 x i9> @vp_ctlz_zero_undef_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_ctlz_zero_undef_nxv1i9:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 511
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vand.vx v8, v8, a1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 142
+; CHECK-NEXT: vrsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: li a0, 7
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_ctlz_zero_undef_nxv1i9:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: li a1, 511
+; CHECK-ZVBB-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT: vand.vx v8, v8, a1
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT: vclz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: li a0, 7
+; CHECK-ZVBB-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-ZVBB-NEXT: ret
+ %v = call <vscale x 1 x i9> @llvm.vp.ctlz.nxv1i9(<vscale x 1 x i9> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i9> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
index 7a66148651014b..d47de77fb4c122 100644
--- a/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/ctpop-vp.ll
@@ -3287,3 +3287,73 @@ define <vscale x 16 x i64> @vp_ctpop_nxv16i64_unmasked(<vscale x 16 x i64> %va,
%v = call <vscale x 16 x i64> @llvm.vp.ctpop.nxv16i64(<vscale x 16 x i64> %va, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x i64> %v
}
+
+; Test promotion.
+declare <vscale x 1 x i9> @llvm.vp.ctpop.nxv1i9(<vscale x 1 x i9>, <vscale x 1 x i1>, i32)
+
+define <vscale x 1 x i9> @vp_ctpop_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_ctpop_nxv1i9:
+; RV32: # %bb.0:
+; RV32-NEXT: li a1, 511
+; RV32-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; RV32-NEXT: vand.vx v8, v8, a1
+; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
+; RV32-NEXT: lui a0, 5
+; RV32-NEXT: addi a0, a0, 1365
+; RV32-NEXT: vand.vx v9, v9, a0, v0.t
+; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
+; RV32-NEXT: lui a0, 3
+; RV32-NEXT: addi a0, a0, 819
+; RV32-NEXT: vand.vx v9, v8, a0, v0.t
+; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT: vand.vx v8, v8, a0, v0.t
+; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
+; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
+; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
+; RV32-NEXT: lui a0, 1
+; RV32-NEXT: addi a0, a0, -241
+; RV32-NEXT: vand.vx v8, v8, a0, v0.t
+; RV32-NEXT: li a0, 257
+; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
+; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vp_ctpop_nxv1i9:
+; RV64: # %bb.0:
+; RV64-NEXT: li a1, 511
+; RV64-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; RV64-NEXT: vand.vx v8, v8, a1
+; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
+; RV64-NEXT: lui a0, 5
+; RV64-NEXT: addiw a0, a0, 1365
+; RV64-NEXT: vand.vx v9, v9, a0, v0.t
+; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
+; RV64-NEXT: lui a0, 3
+; RV64-NEXT: addiw a0, a0, 819
+; RV64-NEXT: vand.vx v9, v8, a0, v0.t
+; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
+; RV64-NEXT: vand.vx v8, v8, a0, v0.t
+; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
+; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
+; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
+; RV64-NEXT: lui a0, 1
+; RV64-NEXT: addiw a0, a0, -241
+; RV64-NEXT: vand.vx v8, v8, a0, v0.t
+; RV64-NEXT: li a0, 257
+; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
+; RV64-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_ctpop_nxv1i9:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: li a1, 511
+; CHECK-ZVBB-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT: vand.vx v8, v8, a1
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT: vcpop.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: ret
+ %v = call <vscale x 1 x i9> @llvm.vp.ctpop.nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i9> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
index 896cd31b439f70..1239529c3e7c9f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/cttz-vp.ll
@@ -5001,3 +5001,100 @@ define <vscale x 16 x i64> @vp_cttz_zero_undef_nxv16i64_unmasked(<vscale x 16 x
%v = call <vscale x 16 x i64> @llvm.vp.cttz.nxv16i64(<vscale x 16 x i64> %va, i1 true, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x i64> %v
}
+
+; Test promotion.
+declare <vscale x 1 x i9> @llvm.vp.cttz.nxv1i9(<vscale x 1 x i9>, i1 immarg, <vscale x 1 x i1>, i32)
+define <vscale x 1 x i9> @vp_cttz_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; RV32-LABEL: vp_cttz_nxv1i9:
+; RV32: # %bb.0:
+; RV32-NEXT: li a1, 512
+; RV32-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; RV32-NEXT: vor.vx v8, v8, a1, v0.t
+; RV32-NEXT: li a0, 1
+; RV32-NEXT: vsub.vx v9, v8, a0, v0.t
+; RV32-NEXT: vnot.v v8, v8, v0.t
+; RV32-NEXT: vand.vv v8, v8, v9, v0.t
+; RV32-NEXT: vsrl.vi v9, v8, 1, v0.t
+; RV32-NEXT: lui a0, 5
+; RV32-NEXT: addi a0, a0, 1365
+; RV32-NEXT: vand.vx v9, v9, a0, v0.t
+; RV32-NEXT: vsub.vv v8, v8, v9, v0.t
+; RV32-NEXT: lui a0, 3
+; RV32-NEXT: addi a0, a0, 819
+; RV32-NEXT: vand.vx v9, v8, a0, v0.t
+; RV32-NEXT: vsrl.vi v8, v8, 2, v0.t
+; RV32-NEXT: vand.vx v8, v8, a0, v0.t
+; RV32-NEXT: vadd.vv v8, v9, v8, v0.t
+; RV32-NEXT: vsrl.vi v9, v8, 4, v0.t
+; RV32-NEXT: vadd.vv v8, v8, v9, v0.t
+; RV32-NEXT: lui a0, 1
+; RV32-NEXT: addi a0, a0, -241
+; RV32-NEXT: vand.vx v8, v8, a0, v0.t
+; RV32-NEXT: li a0, 257
+; RV32-NEXT: vmul.vx v8, v8, a0, v0.t
+; RV32-NEXT: vsrl.vi v8, v8, 8, v0.t
+; RV32-NEXT: ret
+;
+; RV64-LABEL: vp_cttz_nxv1i9:
+; RV64: # %bb.0:
+; RV64-NEXT: li a1, 512
+; RV64-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; RV64-NEXT: vor.vx v8, v8, a1, v0.t
+; RV64-NEXT: li a0, 1
+; RV64-NEXT: vsub.vx v9, v8, a0, v0.t
+; RV64-NEXT: vnot.v v8, v8, v0.t
+; RV64-NEXT: vand.vv v8, v8, v9, v0.t
+; RV64-NEXT: vsrl.vi v9, v8, 1, v0.t
+; RV64-NEXT: lui a0, 5
+; RV64-NEXT: addiw a0, a0, 1365
+; RV64-NEXT: vand.vx v9, v9, a0, v0.t
+; RV64-NEXT: vsub.vv v8, v8, v9, v0.t
+; RV64-NEXT: lui a0, 3
+; RV64-NEXT: addiw a0, a0, 819
+; RV64-NEXT: vand.vx v9, v8, a0, v0.t
+; RV64-NEXT: vsrl.vi v8, v8, 2, v0.t
+; RV64-NEXT: vand.vx v8, v8, a0, v0.t
+; RV64-NEXT: vadd.vv v8, v9, v8, v0.t
+; RV64-NEXT: vsrl.vi v9, v8, 4, v0.t
+; RV64-NEXT: vadd.vv v8, v8, v9, v0.t
+; RV64-NEXT: lui a0, 1
+; RV64-NEXT: addiw a0, a0, -241
+; RV64-NEXT: vand.vx v8, v8, a0, v0.t
+; RV64-NEXT: li a0, 257
+; RV64-NEXT: vmul.vx v8, v8, a0, v0.t
+; RV64-NEXT: vsrl.vi v8, v8, 8, v0.t
+; RV64-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_cttz_nxv1i9:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: li a1, 512
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT: vor.vx v8, v8, a1, v0.t
+; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: ret
+ %v = call <vscale x 1 x i9> @llvm.vp.cttz.nxv1i9(<vscale x 1 x i9> %va, i1 false, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i9> %v
+}
+define <vscale x 1 x i9> @vp_zero_undef_cttz_nxv1i9(<vscale x 1 x i9> %va, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: vp_zero_undef_cttz_nxv1i9:
+; CHECK: # %bb.0:
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vrsub.vi v9, v8, 0, v0.t
+; CHECK-NEXT: vand.vv v8, v8, v9, v0.t
+; CHECK-NEXT: vfwcvt.f.xu.v v9, v8, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
+; CHECK-NEXT: vsrl.vi v8, v9, 23, v0.t
+; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vnsrl.wi v8, v8, 0, v0.t
+; CHECK-NEXT: li a0, 127
+; CHECK-NEXT: vsub.vx v8, v8, a0, v0.t
+; CHECK-NEXT: ret
+;
+; CHECK-ZVBB-LABEL: vp_zero_undef_cttz_nxv1i9:
+; CHECK-ZVBB: # %bb.0:
+; CHECK-ZVBB-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-ZVBB-NEXT: vctz.v v8, v8, v0.t
+; CHECK-ZVBB-NEXT: ret
+ %v = call <vscale x 1 x i9> @llvm.vp.cttz.nxv1i9(<vscale x 1 x i9> %va, i1 true, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i9> %v
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll
index fe897f52c23085..0b705eda2d913f 100644
--- a/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/fshr-fshl-vp.ll
@@ -1330,3 +1330,51 @@ define <vscale x 16 x i64> @fshl_v16i64(<vscale x 16 x i64> %a, <vscale x 16 x i
%res = call <vscale x 16 x i64> @llvm.vp.fshl.nxv16i64(<vscale x 16 x i64> %a, <vscale x 16 x i64> %b, <vscale x 16 x i64> %c, <vscale x 16 x i1> %m, i32 %evl)
ret <vscale x 16 x i64> %res
}
+
+; Test promotion.
+declare <vscale x 1 x i9> @llvm.vp.fshr.nxv1i9(<vscale x 1 x i9>, <vscale x 1 x i9>, <vscale x 1 x i9>, <vscale x 1 x i1>, i32)
+define <vscale x 1 x i9> @fshr_v1i9(<vscale x 1 x i9> %a, <vscale x 1 x i9> %b, <vscale x 1 x i9> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fshr_v1i9:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 511
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vand.vx v10, v10, a1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsll.vi v9, v9, 7, v0.t
+; CHECK-NEXT: li a0, 9
+; CHECK-NEXT: vremu.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vadd.vi v10, v10, 7, v0.t
+; CHECK-NEXT: vand.vi v11, v10, 15, v0.t
+; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t
+; CHECK-NEXT: vsll.vi v8, v8, 1, v0.t
+; CHECK-NEXT: vnot.v v10, v10, v0.t
+; CHECK-NEXT: vand.vi v10, v10, 15, v0.t
+; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %res = call <vscale x 1 x i9> @llvm.vp.fshr.nxv1i9(<vscale x 1 x i9> %a, <vscale x 1 x i9> %b, <vscale x 1 x i9> %c, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i9> %res
+}
+
+declare <vscale x 1 x i9> @llvm.vp.fshl.nxv1i9(<vscale x 1 x i9>, <vscale x 1 x i9>, <vscale x 1 x i9>, <vscale x 1 x i1>, i32)
+define <vscale x 1 x i9> @fshl_v1i9(<vscale x 1 x i9> %a, <vscale x 1 x i9> %b, <vscale x 1 x i9> %c, <vscale x 1 x i1> %m, i32 zeroext %evl) {
+; CHECK-LABEL: fshl_v1i9:
+; CHECK: # %bb.0:
+; CHECK-NEXT: li a1, 511
+; CHECK-NEXT: vsetvli a2, zero, e16, mf4, ta, ma
+; CHECK-NEXT: vand.vx v10, v10, a1
+; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
+; CHECK-NEXT: vsll.vi v9, v9, 7, v0.t
+; CHECK-NEXT: vsrl.vi v9, v9, 1, v0.t
+; CHECK-NEXT: li a0, 9
+; CHECK-NEXT: vremu.vx v10, v10, a0, v0.t
+; CHECK-NEXT: vnot.v v11, v10, v0.t
+; CHECK-NEXT: vand.vi v11, v11, 15, v0.t
+; CHECK-NEXT: vsrl.vv v9, v9, v11, v0.t
+; CHECK-NEXT: vand.vi v10, v10, 15, v0.t
+; CHECK-NEXT: vsll.vv v8, v8, v10, v0.t
+; CHECK-NEXT: vor.vv v8, v8, v9, v0.t
+; CHECK-NEXT: ret
+ %res = call <vscale x 1 x i9> @llvm.vp.fshl.nxv1i9(<vscale x 1 x i9> %a, <vscale x 1 x i9> %b, <vscale x 1 x i9> %c, <vscale x 1 x i1> %m, i32 %evl)
+ ret <vscale x 1 x i9> %res
+}
More information about the llvm-commits
mailing list