[llvm] [InstCombine][RISCV] Convert VPIntrinsics with splat operands to splats (PR #65706)
Michael Maitland via llvm-commits
llvm-commits at lists.llvm.org
Thu Sep 7 20:16:07 PDT 2023
https://github.com/michaelmaitland created https://github.com/llvm/llvm-project/pull/65706:
of the scalar operation
VP Intrinsics whose vector operands are both splat values may be simplified into the scalar version of the operation and the result is splatted. If this simplification occurs, then it can lead to scalarization during CodeGen.
This issue is the intrinsic dual of #65072. This issue scalarizes non-legal types when the operations are VP Intrinsics.
>From f118ff11115c8c2289466e36fecc65ed36fcb879 Mon Sep 17 00:00:00 2001
From: Michael Maitland <michaeltmaitland at gmail.com>
Date: Thu, 7 Sep 2023 20:02:15 -0700
Subject: [PATCH] [InstCombine][RISCV] Convert VPIntrinsics with splat operands
to splats of the scalar operation
VP Intrinsics whose vector operands are both splat values may be simplified
into the scalar version of the operation and the result is splatted. If
this simplification occurs, then it can lead to scalarization during CodeGen.
This issue is the intrinsic dual of #65072. This issue scalarizes
non-legal types when the operations are VP Intrinsics.
---
.../InstCombine/InstCombineCalls.cpp | 108 +++
.../RISCV/rvv/vpbinops-scalarization.ll | 896 ++++++++++++++++++
2 files changed, 1004 insertions(+)
create mode 100644 llvm/test/CodeGen/RISCV/rvv/vpbinops-scalarization.ll
diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index b493dff23fc0bf4..b8e69aceaaf5075 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -1399,6 +1399,110 @@ static Instruction *foldBitOrderCrossLogicOp(Value *V,
return nullptr;
}
+/// VP Intrinsics whose vector operands are both splat values may be simplified
+/// into the scalar version of the operation and the result is splatted. This
+/// can lead to scalarization down the line.
+Value *convertOpOfSplatsToSplatOfOp(VPIntrinsic *VPI,
+ InstCombiner::BuilderTy &Builder) {
+ Value *Op0 = VPI->getArgOperand(0);
+ Value *Op1 = VPI->getArgOperand(1);
+
+ if (!isSplatValue(Op0) || !isSplatValue(Op1))
+ return nullptr;
+
+ // For the binary VP intrinsics supported here, the result on disabled lanes
+ // is a poison value. For now, only do this simplification if all lanes
+ // are active.
+ // TODO: Relax the condition that all lanes are active by using insertelement
+ // on inactive lanes.
+ Value *Mask = VPI->getArgOperand(2);
+ if (!maskIsAllOneOrUndef(Mask))
+ return nullptr;
+
+ Value *EVL = VPI->getArgOperand(3);
+ auto SplatAndPoison = [&Builder, &Op0, &EVL](Value *V) {
+ ElementCount EC = cast<VectorType>(Op0->getType())->getElementCount();
+ return Builder.CreateVectorSplat(EC, V);
+ // FIXME: Do we need to Poison out all lanes past EVL since the semantics of
+ // all of these intrinsics are that non-active lanes are poison?
+ };
+ switch(VPI->getIntrinsicID()) {
+ case Intrinsic::vp_add:
+ return SplatAndPoison(
+ Builder.CreateAdd(Builder.CreateExtractElement(Op0, (uint64_t)0),
+ Builder.CreateExtractElement(Op1, (uint64_t)1)));
+ case Intrinsic::vp_sub:
+ return SplatAndPoison(
+ Builder.CreateSub(Builder.CreateExtractElement(Op0, (uint64_t)0),
+ Builder.CreateExtractElement(Op1, (uint64_t)1)));
+ case Intrinsic::vp_mul:
+ return SplatAndPoison(
+ Builder.CreateMul(Builder.CreateExtractElement(Op0, (uint64_t)0),
+ Builder.CreateExtractElement(Op1, (uint64_t)1)));
+ case Intrinsic::vp_sdiv:
+ return SplatAndPoison(
+ Builder.CreateSDiv(Builder.CreateExtractElement(Op0, (uint64_t)0),
+ Builder.CreateExtractElement(Op1, (uint64_t)1)));
+ case Intrinsic::vp_udiv:
+ return SplatAndPoison(
+ Builder.CreateUDiv(Builder.CreateExtractElement(Op0, (uint64_t)0),
+ Builder.CreateExtractElement(Op1, (uint64_t)1)));
+ case Intrinsic::vp_srem:
+ return SplatAndPoison(
+ Builder.CreateSRem(Builder.CreateExtractElement(Op0, (uint64_t)0),
+ Builder.CreateExtractElement(Op1, (uint64_t)1)));
+ case Intrinsic::vp_urem:
+ return SplatAndPoison(
+ Builder.CreateURem(Builder.CreateExtractElement(Op0, (uint64_t)0),
+ Builder.CreateExtractElement(Op1, (uint64_t)1)));
+ case Intrinsic::vp_ashr:
+ return SplatAndPoison(
+ Builder.CreateAShr(Builder.CreateExtractElement(Op0, (uint64_t)0),
+ Builder.CreateExtractElement(Op1, (uint64_t)1)));
+ case Intrinsic::vp_lshr:
+ return SplatAndPoison(
+ Builder.CreateLShr(Builder.CreateExtractElement(Op0, (uint64_t)0),
+ Builder.CreateExtractElement(Op1, (uint64_t)1)));
+ case Intrinsic::vp_shl:
+ return SplatAndPoison(
+ Builder.CreateShl(Builder.CreateExtractElement(Op0, (uint64_t)0),
+ Builder.CreateExtractElement(Op1, (uint64_t)1)));
+ case Intrinsic::vp_or:
+ return SplatAndPoison(
+ Builder.CreateOr(Builder.CreateExtractElement(Op0, (uint64_t)0),
+ Builder.CreateExtractElement(Op1, (uint64_t)1)));
+ case Intrinsic::vp_and:
+ return SplatAndPoison(
+ Builder.CreateAnd(Builder.CreateExtractElement(Op0, (uint64_t)0),
+ Builder.CreateExtractElement(Op1, (uint64_t)1)));
+ case Intrinsic::vp_xor:
+ return SplatAndPoison(
+ Builder.CreateXor(Builder.CreateExtractElement(Op0, (uint64_t)0),
+ Builder.CreateExtractElement(Op1, (uint64_t)1)));
+ case Intrinsic::vp_fadd:
+ return SplatAndPoison(
+ Builder.CreateFAdd(Builder.CreateExtractElement(Op0, (uint64_t)0),
+ Builder.CreateExtractElement(Op1, (uint64_t)1)));
+ case Intrinsic::vp_fsub:
+ return SplatAndPoison(
+ Builder.CreateFSub(Builder.CreateExtractElement(Op0, (uint64_t)0),
+ Builder.CreateExtractElement(Op1, (uint64_t)1)));
+ case Intrinsic::vp_fmul:
+ return SplatAndPoison(
+ Builder.CreateFMul(Builder.CreateExtractElement(Op0, (uint64_t)0),
+ Builder.CreateExtractElement(Op1, (uint64_t)1)));
+ case Intrinsic::vp_fdiv:
+ return SplatAndPoison(
+ Builder.CreateFDiv(Builder.CreateExtractElement(Op0, (uint64_t)0),
+ Builder.CreateExtractElement(Op1, (uint64_t)1)));
+ case Intrinsic::vp_frem:
+ return SplatAndPoison(
+ Builder.CreateFRem(Builder.CreateExtractElement(Op0, (uint64_t)0),
+ Builder.CreateExtractElement(Op1, (uint64_t)1)));
+ }
+ return nullptr;
+}
+
/// CallInst simplification. This mostly only handles folding of intrinsic
/// instructions. For normal calls, it allows visitCallBase to do the heavy
/// lifting.
@@ -1521,6 +1625,10 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
return eraseInstFromFunction(CI);
}
+ if (VPIntrinsic *VPI = dyn_cast<VPIntrinsic>(II))
+ if (Value *V = convertOpOfSplatsToSplatOfOp(VPI, Builder))
+ return replaceInstUsesWith(*II, V);
+
Intrinsic::ID IID = II->getIntrinsicID();
switch (IID) {
case Intrinsic::objectsize: {
diff --git a/llvm/test/CodeGen/RISCV/rvv/vpbinops-scalarization.ll b/llvm/test/CodeGen/RISCV/rvv/vpbinops-scalarization.ll
new file mode 100644
index 000000000000000..b45f88a0290ac79
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vpbinops-scalarization.ll
@@ -0,0 +1,896 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: opt -S -passes=instcombine,vector-combine %s | llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefixs=INST-COMBINE, BOTH
+; RUN: llc -mtriple=riscv64 -mattr=+v -verify-machineinstrs | FileCheck %s --check-prefixs=NO-INST-COMBINE, BOTH
+
+declare <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.sub.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.ashr.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.lshr.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.shl.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.or.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.and.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x i64> @llvm.vp.xor.nxv1i64(<vscale x 1 x i64>, <vscale x 1 x i64>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x float> @llvm.vp.fadd.nxv1i64(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x float> @llvm.vp.fsub.nxv1i64(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x float> @llvm.vp.fdiv.nxv1i64(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x i1>, i32)
+declare <vscale x 1 x float> @llvm.vp.frem.nxv1i64(<vscale x 1 x float>, <vscale x 1 x float>, <vscale x 1 x i1>, i32)
+
+declare <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
+declare <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>, <vscale x 8 x i1>, i32)
+
+define <vscale x 1 x i64> @add_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) {
+; INST-COMBINE-LABEL: add_nxv1i64_allonesmask:
+; INST-COMBINE: # %bb.0:
+; INST-COMBINE-NEXT: addi a0, a0, 42
+; INST-COMBINE-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; INST-COMBINE-NEXT: vmul.vx v8, v8, a0
+; INST-COMBINE-NEXT: ret
+;
+; NO-INST-COMBINE-LABEL: add_nxv1i64_allonesmask:
+; NO-INST-COMBINE: # %bb.0:
+; NO-INST-COMBINE-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vmv.v.x v9, a0
+; NO-INST-COMBINE-NEXT: li a0, 42
+; NO-INST-COMBINE-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vadd.vx v9, v9, a0
+; NO-INST-COMBINE-NEXT: vmul.vv v8, v8, v9
+; NO-INST-COMBINE-NEXT: ret
+ %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @add_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; BOTH-LABEL: add_nxv1i64_anymask:
+; BOTH: # %bb.0:
+; BOTH-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; BOTH-NEXT: vmv.v.x v9, a0
+; BOTH-NEXT: li a0, 42
+; BOTH-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; BOTH-NEXT: vadd.vx v9, v9, a0, v0.t
+; BOTH-NEXT: vmul.vv v8, v8, v9, v0.t
+; BOTH-NEXT: ret
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.add.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @sub_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) {
+; INST-COMBINE-LABEL: sub_nxv1i64_allonesmask:
+; INST-COMBINE: # %bb.0:
+; INST-COMBINE-NEXT: addi a0, a0, -42
+; INST-COMBINE-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; INST-COMBINE-NEXT: vmul.vx v8, v8, a0
+; INST-COMBINE-NEXT: ret
+;
+; NO-INST-COMBINE-LABEL: sub_nxv1i64_allonesmask:
+; NO-INST-COMBINE: # %bb.0:
+; NO-INST-COMBINE-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vmv.v.x v9, a0
+; NO-INST-COMBINE-NEXT: li a0, 42
+; NO-INST-COMBINE-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vsub.vx v9, v9, a0
+; NO-INST-COMBINE-NEXT: vmul.vv v8, v8, v9
+; NO-INST-COMBINE-NEXT: ret
+ %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.sub.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @sub_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; BOTH-LABEL: sub_nxv1i64_anymask:
+; BOTH: # %bb.0:
+; BOTH-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; BOTH-NEXT: vmv.v.x v9, a0
+; BOTH-NEXT: li a0, 42
+; BOTH-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; BOTH-NEXT: vsub.vx v9, v9, a0, v0.t
+; BOTH-NEXT: vmul.vv v8, v8, v9, v0.t
+; BOTH-NEXT: ret
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.sub.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @mul_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) {
+; INST-COMBINE-LABEL: mul_nxv1i64_allonesmask:
+; INST-COMBINE: # %bb.0:
+; INST-COMBINE-NEXT: addi sp, sp, -32
+; INST-COMBINE-NEXT: .cfi_def_cfa_offset 32
+; INST-COMBINE-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; INST-COMBINE-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; INST-COMBINE-NEXT: .cfi_offset ra, -8
+; INST-COMBINE-NEXT: .cfi_offset s0, -16
+; INST-COMBINE-NEXT: csrr a2, vlenb
+; INST-COMBINE-NEXT: slli a2, a2, 1
+; INST-COMBINE-NEXT: sub sp, sp, a2
+; INST-COMBINE-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb
+; INST-COMBINE-NEXT: mv s0, a1
+; INST-COMBINE-NEXT: addi a1, sp, 16
+; INST-COMBINE-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; INST-COMBINE-NEXT: li a1, 42
+; INST-COMBINE-NEXT: call __muldi3 at plt
+; INST-COMBINE-NEXT: vsetvli zero, s0, e64, m1, ta, ma
+; INST-COMBINE-NEXT: addi a1, sp, 16
+; INST-COMBINE-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload
+; INST-COMBINE-NEXT: vmul.vx v8, v8, a0
+; INST-COMBINE-NEXT: csrr a0, vlenb
+; INST-COMBINE-NEXT: slli a0, a0, 1
+; INST-COMBINE-NEXT: add sp, sp, a0
+; INST-COMBINE-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; INST-COMBINE-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; INST-COMBINE-NEXT: addi sp, sp, 32
+; INST-COMBINE-NEXT: ret
+;
+; NO-INST-COMBINE-LABEL: mul_nxv1i64_allonesmask:
+; NO-INST-COMBINE: # %bb.0:
+; NO-INST-COMBINE-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vmv.v.x v9, a0
+; NO-INST-COMBINE-NEXT: li a0, 42
+; NO-INST-COMBINE-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vmul.vx v9, v9, a0
+; NO-INST-COMBINE-NEXT: vmul.vv v8, v8, v9
+; NO-INST-COMBINE-NEXT: ret
+ %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @mul_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; BOTH-LABEL: mul_nxv1i64_anymask:
+; BOTH: # %bb.0:
+; BOTH-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; BOTH-NEXT: vmv.v.x v9, a0
+; BOTH-NEXT: li a0, 42
+; BOTH-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; BOTH-NEXT: vmul.vx v9, v9, a0, v0.t
+; BOTH-NEXT: vmul.vv v8, v8, v9, v0.t
+; BOTH-NEXT: ret
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @sdiv_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) {
+; INST-COMBINE-LABEL: sdiv_nxv1i64_allonesmask:
+; INST-COMBINE: # %bb.0:
+; INST-COMBINE-NEXT: addi sp, sp, -32
+; INST-COMBINE-NEXT: .cfi_def_cfa_offset 32
+; INST-COMBINE-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; INST-COMBINE-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; INST-COMBINE-NEXT: .cfi_offset ra, -8
+; INST-COMBINE-NEXT: .cfi_offset s0, -16
+; INST-COMBINE-NEXT: csrr a2, vlenb
+; INST-COMBINE-NEXT: slli a2, a2, 1
+; INST-COMBINE-NEXT: sub sp, sp, a2
+; INST-COMBINE-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb
+; INST-COMBINE-NEXT: mv s0, a1
+; INST-COMBINE-NEXT: addi a1, sp, 16
+; INST-COMBINE-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; INST-COMBINE-NEXT: li a1, 42
+; INST-COMBINE-NEXT: call __divdi3 at plt
+; INST-COMBINE-NEXT: vsetvli zero, s0, e64, m1, ta, ma
+; INST-COMBINE-NEXT: addi a1, sp, 16
+; INST-COMBINE-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload
+; INST-COMBINE-NEXT: vmul.vx v8, v8, a0
+; INST-COMBINE-NEXT: csrr a0, vlenb
+; INST-COMBINE-NEXT: slli a0, a0, 1
+; INST-COMBINE-NEXT: add sp, sp, a0
+; INST-COMBINE-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; INST-COMBINE-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; INST-COMBINE-NEXT: addi sp, sp, 32
+; INST-COMBINE-NEXT: ret
+;
+; NO-INST-COMBINE-LABEL: sdiv_nxv1i64_allonesmask:
+; NO-INST-COMBINE: # %bb.0:
+; NO-INST-COMBINE-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vmv.v.x v9, a0
+; NO-INST-COMBINE-NEXT: li a0, 42
+; NO-INST-COMBINE-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vdiv.vx v9, v9, a0
+; NO-INST-COMBINE-NEXT: vmul.vv v8, v8, v9
+; NO-INST-COMBINE-NEXT: ret
+ %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @sdiv_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; BOTH-LABEL: sdiv_nxv1i64_anymask:
+; BOTH: # %bb.0:
+; BOTH-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; BOTH-NEXT: vmv.v.x v9, a0
+; BOTH-NEXT: li a0, 42
+; BOTH-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; BOTH-NEXT: vdiv.vx v9, v9, a0, v0.t
+; BOTH-NEXT: vmul.vv v8, v8, v9, v0.t
+; BOTH-NEXT: ret
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.sdiv.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @udiv_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) {
+; INST-COMBINE-LABEL: udiv_nxv1i64_allonesmask:
+; INST-COMBINE: # %bb.0:
+; INST-COMBINE-NEXT: addi sp, sp, -32
+; INST-COMBINE-NEXT: .cfi_def_cfa_offset 32
+; INST-COMBINE-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; INST-COMBINE-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; INST-COMBINE-NEXT: .cfi_offset ra, -8
+; INST-COMBINE-NEXT: .cfi_offset s0, -16
+; INST-COMBINE-NEXT: csrr a2, vlenb
+; INST-COMBINE-NEXT: slli a2, a2, 1
+; INST-COMBINE-NEXT: sub sp, sp, a2
+; INST-COMBINE-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb
+; INST-COMBINE-NEXT: mv s0, a1
+; INST-COMBINE-NEXT: addi a1, sp, 16
+; INST-COMBINE-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; INST-COMBINE-NEXT: li a1, 42
+; INST-COMBINE-NEXT: call __udivdi3 at plt
+; INST-COMBINE-NEXT: vsetvli zero, s0, e64, m1, ta, ma
+; INST-COMBINE-NEXT: addi a1, sp, 16
+; INST-COMBINE-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload
+; INST-COMBINE-NEXT: vmul.vx v8, v8, a0
+; INST-COMBINE-NEXT: csrr a0, vlenb
+; INST-COMBINE-NEXT: slli a0, a0, 1
+; INST-COMBINE-NEXT: add sp, sp, a0
+; INST-COMBINE-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; INST-COMBINE-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; INST-COMBINE-NEXT: addi sp, sp, 32
+; INST-COMBINE-NEXT: ret
+;
+; NO-INST-COMBINE-LABEL: udiv_nxv1i64_allonesmask:
+; NO-INST-COMBINE: # %bb.0:
+; NO-INST-COMBINE-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vmv.v.x v9, a0
+; NO-INST-COMBINE-NEXT: li a0, 42
+; NO-INST-COMBINE-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vdivu.vx v9, v9, a0
+; NO-INST-COMBINE-NEXT: vmul.vv v8, v8, v9
+; NO-INST-COMBINE-NEXT: ret
+ %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @udiv_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; BOTH-LABEL: udiv_nxv1i64_anymask:
+; BOTH: # %bb.0:
+; BOTH-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; BOTH-NEXT: vmv.v.x v9, a0
+; BOTH-NEXT: li a0, 42
+; BOTH-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; BOTH-NEXT: vdivu.vx v9, v9, a0, v0.t
+; BOTH-NEXT: vmul.vv v8, v8, v9, v0.t
+; BOTH-NEXT: ret
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.udiv.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @srem_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) {
+; INST-COMBINE-LABEL: srem_nxv1i64_allonesmask:
+; INST-COMBINE: # %bb.0:
+; INST-COMBINE-NEXT: addi sp, sp, -32
+; INST-COMBINE-NEXT: .cfi_def_cfa_offset 32
+; INST-COMBINE-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; INST-COMBINE-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; INST-COMBINE-NEXT: .cfi_offset ra, -8
+; INST-COMBINE-NEXT: .cfi_offset s0, -16
+; INST-COMBINE-NEXT: csrr a2, vlenb
+; INST-COMBINE-NEXT: slli a2, a2, 1
+; INST-COMBINE-NEXT: sub sp, sp, a2
+; INST-COMBINE-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb
+; INST-COMBINE-NEXT: mv s0, a1
+; INST-COMBINE-NEXT: addi a1, sp, 16
+; INST-COMBINE-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; INST-COMBINE-NEXT: li a1, 42
+; INST-COMBINE-NEXT: call __moddi3 at plt
+; INST-COMBINE-NEXT: vsetvli zero, s0, e64, m1, ta, ma
+; INST-COMBINE-NEXT: addi a1, sp, 16
+; INST-COMBINE-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload
+; INST-COMBINE-NEXT: vmul.vx v8, v8, a0
+; INST-COMBINE-NEXT: csrr a0, vlenb
+; INST-COMBINE-NEXT: slli a0, a0, 1
+; INST-COMBINE-NEXT: add sp, sp, a0
+; INST-COMBINE-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; INST-COMBINE-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; INST-COMBINE-NEXT: addi sp, sp, 32
+; INST-COMBINE-NEXT: ret
+;
+; NO-INST-COMBINE-LABEL: srem_nxv1i64_allonesmask:
+; NO-INST-COMBINE: # %bb.0:
+; NO-INST-COMBINE-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vmv.v.x v9, a0
+; NO-INST-COMBINE-NEXT: li a0, 42
+; NO-INST-COMBINE-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vrem.vx v9, v9, a0
+; NO-INST-COMBINE-NEXT: vmul.vv v8, v8, v9
+; NO-INST-COMBINE-NEXT: ret
+ %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @srem_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; BOTH-LABEL: srem_nxv1i64_anymask:
+; BOTH: # %bb.0:
+; BOTH-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; BOTH-NEXT: vmv.v.x v9, a0
+; BOTH-NEXT: li a0, 42
+; BOTH-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; BOTH-NEXT: vrem.vx v9, v9, a0, v0.t
+; BOTH-NEXT: vmul.vv v8, v8, v9, v0.t
+; BOTH-NEXT: ret
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.srem.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @urem_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) {
+; INST-COMBINE-LABEL: urem_nxv1i64_allonesmask:
+; INST-COMBINE: # %bb.0:
+; INST-COMBINE-NEXT: addi sp, sp, -32
+; INST-COMBINE-NEXT: .cfi_def_cfa_offset 32
+; INST-COMBINE-NEXT: sd ra, 24(sp) # 8-byte Folded Spill
+; INST-COMBINE-NEXT: sd s0, 16(sp) # 8-byte Folded Spill
+; INST-COMBINE-NEXT: .cfi_offset ra, -8
+; INST-COMBINE-NEXT: .cfi_offset s0, -16
+; INST-COMBINE-NEXT: csrr a2, vlenb
+; INST-COMBINE-NEXT: slli a2, a2, 1
+; INST-COMBINE-NEXT: sub sp, sp, a2
+; INST-COMBINE-NEXT: .cfi_escape 0x0f, 0x0d, 0x72, 0x00, 0x11, 0x20, 0x22, 0x11, 0x02, 0x92, 0xa2, 0x38, 0x00, 0x1e, 0x22 # sp + 32 + 2 * vlenb
+; INST-COMBINE-NEXT: mv s0, a1
+; INST-COMBINE-NEXT: addi a1, sp, 16
+; INST-COMBINE-NEXT: vs1r.v v8, (a1) # Unknown-size Folded Spill
+; INST-COMBINE-NEXT: li a1, 42
+; INST-COMBINE-NEXT: call __umoddi3 at plt
+; INST-COMBINE-NEXT: vsetvli zero, s0, e64, m1, ta, ma
+; INST-COMBINE-NEXT: addi a1, sp, 16
+; INST-COMBINE-NEXT: vl1r.v v8, (a1) # Unknown-size Folded Reload
+; INST-COMBINE-NEXT: vmul.vx v8, v8, a0
+; INST-COMBINE-NEXT: csrr a0, vlenb
+; INST-COMBINE-NEXT: slli a0, a0, 1
+; INST-COMBINE-NEXT: add sp, sp, a0
+; INST-COMBINE-NEXT: ld ra, 24(sp) # 8-byte Folded Reload
+; INST-COMBINE-NEXT: ld s0, 16(sp) # 8-byte Folded Reload
+; INST-COMBINE-NEXT: addi sp, sp, 32
+; INST-COMBINE-NEXT: ret
+;
+; NO-INST-COMBINE-LABEL: urem_nxv1i64_allonesmask:
+; NO-INST-COMBINE: # %bb.0:
+; NO-INST-COMBINE-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vmv.v.x v9, a0
+; NO-INST-COMBINE-NEXT: li a0, 42
+; NO-INST-COMBINE-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vremu.vx v9, v9, a0
+; NO-INST-COMBINE-NEXT: vmul.vv v8, v8, v9
+; NO-INST-COMBINE-NEXT: ret
+ %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @urem_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; BOTH-LABEL: urem_nxv1i64_anymask:
+; BOTH: # %bb.0:
+; BOTH-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; BOTH-NEXT: vmv.v.x v9, a0
+; BOTH-NEXT: li a0, 42
+; BOTH-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; BOTH-NEXT: vremu.vx v9, v9, a0, v0.t
+; BOTH-NEXT: vmul.vv v8, v8, v9, v0.t
+; BOTH-NEXT: ret
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.urem.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @ashr_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) {
+; INST-COMBINE-LABEL: ashr_nxv1i64_allonesmask:
+; INST-COMBINE: # %bb.0:
+; INST-COMBINE-NEXT: srai a0, a0, 42
+; INST-COMBINE-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; INST-COMBINE-NEXT: vmul.vx v8, v8, a0
+; INST-COMBINE-NEXT: ret
+;
+; NO-INST-COMBINE-LABEL: ashr_nxv1i64_allonesmask:
+; NO-INST-COMBINE: # %bb.0:
+; NO-INST-COMBINE-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vmv.v.x v9, a0
+; NO-INST-COMBINE-NEXT: li a0, 42
+; NO-INST-COMBINE-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vsra.vx v9, v9, a0
+; NO-INST-COMBINE-NEXT: vmul.vv v8, v8, v9
+; NO-INST-COMBINE-NEXT: ret
+ %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.ashr.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @ashr_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; BOTH-LABEL: ashr_nxv1i64_anymask:
+; BOTH: # %bb.0:
+; BOTH-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; BOTH-NEXT: vmv.v.x v9, a0
+; BOTH-NEXT: li a0, 42
+; BOTH-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; BOTH-NEXT: vsra.vx v9, v9, a0, v0.t
+; BOTH-NEXT: vmul.vv v8, v8, v9, v0.t
+; BOTH-NEXT: ret
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.ashr.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @lshr_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) {
+; INST-COMBINE-LABEL: lshr_nxv1i64_allonesmask:
+; INST-COMBINE: # %bb.0:
+; INST-COMBINE-NEXT: srli a0, a0, 42
+; INST-COMBINE-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; INST-COMBINE-NEXT: vmul.vx v8, v8, a0
+; INST-COMBINE-NEXT: ret
+;
+; NO-INST-COMBINE-LABEL: lshr_nxv1i64_allonesmask:
+; NO-INST-COMBINE: # %bb.0:
+; NO-INST-COMBINE-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vmv.v.x v9, a0
+; NO-INST-COMBINE-NEXT: li a0, 42
+; NO-INST-COMBINE-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vsrl.vx v9, v9, a0
+; NO-INST-COMBINE-NEXT: vmul.vv v8, v8, v9
+; NO-INST-COMBINE-NEXT: ret
+ %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.lshr.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @lshr_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; BOTH-LABEL: lshr_nxv1i64_anymask:
+; BOTH: # %bb.0:
+; BOTH-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; BOTH-NEXT: vmv.v.x v9, a0
+; BOTH-NEXT: li a0, 42
+; BOTH-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; BOTH-NEXT: vsrl.vx v9, v9, a0, v0.t
+; BOTH-NEXT: vmul.vv v8, v8, v9, v0.t
+; BOTH-NEXT: ret
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.lshr.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @shl_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) {
+; INST-COMBINE-LABEL: shl_nxv1i64_allonesmask:
+; INST-COMBINE: # %bb.0:
+; INST-COMBINE-NEXT: slli a0, a0, 42
+; INST-COMBINE-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; INST-COMBINE-NEXT: vmul.vx v8, v8, a0
+; INST-COMBINE-NEXT: ret
+;
+; NO-INST-COMBINE-LABEL: shl_nxv1i64_allonesmask:
+; NO-INST-COMBINE: # %bb.0:
+; NO-INST-COMBINE-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vmv.v.x v9, a0
+; NO-INST-COMBINE-NEXT: li a0, 42
+; NO-INST-COMBINE-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vsll.vx v9, v9, a0
+; NO-INST-COMBINE-NEXT: vmul.vv v8, v8, v9
+; NO-INST-COMBINE-NEXT: ret
+ %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.shl.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @shl_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; BOTH-LABEL: shl_nxv1i64_anymask:
+; BOTH: # %bb.0:
+; BOTH-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; BOTH-NEXT: vmv.v.x v9, a0
+; BOTH-NEXT: li a0, 42
+; BOTH-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; BOTH-NEXT: vsll.vx v9, v9, a0, v0.t
+; BOTH-NEXT: vmul.vv v8, v8, v9, v0.t
+; BOTH-NEXT: ret
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.shl.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @or_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) {
+; INST-COMBINE-LABEL: or_nxv1i64_allonesmask:
+; INST-COMBINE: # %bb.0:
+; INST-COMBINE-NEXT: ori a0, a0, 42
+; INST-COMBINE-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; INST-COMBINE-NEXT: vmul.vx v8, v8, a0
+; INST-COMBINE-NEXT: ret
+;
+; NO-INST-COMBINE-LABEL: or_nxv1i64_allonesmask:
+; NO-INST-COMBINE: # %bb.0:
+; NO-INST-COMBINE-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vmv.v.x v9, a0
+; NO-INST-COMBINE-NEXT: li a0, 42
+; NO-INST-COMBINE-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vor.vx v9, v9, a0
+; NO-INST-COMBINE-NEXT: vmul.vv v8, v8, v9
+; NO-INST-COMBINE-NEXT: ret
+ %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.or.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @or_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; BOTH-LABEL: or_nxv1i64_anymask:
+; BOTH: # %bb.0:
+; BOTH-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; BOTH-NEXT: vmv.v.x v9, a0
+; BOTH-NEXT: li a0, 42
+; BOTH-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; BOTH-NEXT: vor.vx v9, v9, a0, v0.t
+; BOTH-NEXT: vmul.vv v8, v8, v9, v0.t
+; BOTH-NEXT: ret
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.or.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @and_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) {
+; INST-COMBINE-LABEL: and_nxv1i64_allonesmask:
+; INST-COMBINE: # %bb.0:
+; INST-COMBINE-NEXT: andi a0, a0, 42
+; INST-COMBINE-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; INST-COMBINE-NEXT: vmul.vx v8, v8, a0
+; INST-COMBINE-NEXT: ret
+;
+; NO-INST-COMBINE-LABEL: and_nxv1i64_allonesmask:
+; NO-INST-COMBINE: # %bb.0:
+; NO-INST-COMBINE-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vmv.v.x v9, a0
+; NO-INST-COMBINE-NEXT: li a0, 42
+; NO-INST-COMBINE-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vand.vx v9, v9, a0
+; NO-INST-COMBINE-NEXT: vmul.vv v8, v8, v9
+; NO-INST-COMBINE-NEXT: ret
+ %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.and.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @and_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; BOTH-LABEL: and_nxv1i64_anymask:
+; BOTH: # %bb.0:
+; BOTH-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; BOTH-NEXT: vmv.v.x v9, a0
+; BOTH-NEXT: li a0, 42
+; BOTH-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; BOTH-NEXT: vand.vx v9, v9, a0, v0.t
+; BOTH-NEXT: vmul.vv v8, v8, v9, v0.t
+; BOTH-NEXT: ret
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.and.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @xor_nxv1i64_allonesmask(<vscale x 1 x i64> %x, i64 %y, i32 zeroext %evl) {
+; INST-COMBINE-LABEL: xor_nxv1i64_allonesmask:
+; INST-COMBINE: # %bb.0:
+; INST-COMBINE-NEXT: xori a0, a0, 42
+; INST-COMBINE-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; INST-COMBINE-NEXT: vmul.vx v8, v8, a0
+; INST-COMBINE-NEXT: ret
+;
+; NO-INST-COMBINE-LABEL: xor_nxv1i64_allonesmask:
+; NO-INST-COMBINE: # %bb.0:
+; NO-INST-COMBINE-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vmv.v.x v9, a0
+; NO-INST-COMBINE-NEXT: li a0, 42
+; NO-INST-COMBINE-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vxor.vx v9, v9, a0
+; NO-INST-COMBINE-NEXT: vmul.vv v8, v8, v9
+; NO-INST-COMBINE-NEXT: ret
+ %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.xor.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x i64> @xor_nxv1i64_anymask(<vscale x 1 x i64> %x, i64 %y, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; BOTH-LABEL: xor_nxv1i64_anymask:
+; BOTH: # %bb.0:
+; BOTH-NEXT: vsetvli a2, zero, e64, m1, ta, ma
+; BOTH-NEXT: vmv.v.x v9, a0
+; BOTH-NEXT: li a0, 42
+; BOTH-NEXT: vsetvli zero, a1, e64, m1, ta, ma
+; BOTH-NEXT: vxor.vx v9, v9, a0, v0.t
+; BOTH-NEXT: vmul.vv v8, v8, v9, v0.t
+; BOTH-NEXT: ret
+ %1 = insertelement <vscale x 1 x i64> poison, i64 %y, i32 0
+ %2 = shufflevector <vscale x 1 x i64> %1, <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x i64> @llvm.vp.xor.nxv1i64(<vscale x 1 x i64> %2, <vscale x 1 x i64> shufflevector(<vscale x 1 x i64> insertelement(<vscale x 1 x i64> poison, i64 42, i32 0), <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x i64> @llvm.vp.mul.nxv1i64(<vscale x 1 x i64> %x, <vscale x 1 x i64> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x i64> %4
+}
+
+define <vscale x 1 x float> @fadd_nxv1i64_allonesmask(<vscale x 1 x float> %x, float%y, i32 zeroext %evl) {
+; INST-COMBINE-LABEL: fadd_nxv1i64_allonesmask:
+; INST-COMBINE: # %bb.0:
+; INST-COMBINE-NEXT: lui a1, 270976
+; INST-COMBINE-NEXT: fmv.w.x fa5, a1
+; INST-COMBINE-NEXT: fadd.s fa5, fa0, fa5
+; INST-COMBINE-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; INST-COMBINE-NEXT: vfadd.vf v8, v8, fa5
+; INST-COMBINE-NEXT: ret
+;
+; NO-INST-COMBINE-LABEL: fadd_nxv1i64_allonesmask:
+; NO-INST-COMBINE: # %bb.0:
+; NO-INST-COMBINE-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; NO-INST-COMBINE-NEXT: vfmv.v.f v9, fa0
+; NO-INST-COMBINE-NEXT: lui a1, 270976
+; NO-INST-COMBINE-NEXT: fmv.w.x fa5, a1
+; NO-INST-COMBINE-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; NO-INST-COMBINE-NEXT: vfadd.vf v9, v9, fa5
+; NO-INST-COMBINE-NEXT: vfadd.vv v8, v8, v9
+; NO-INST-COMBINE-NEXT: ret
+ %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %1 = insertelement <vscale x 1 x float> poison, float %y, i32 0
+ %2 = shufflevector <vscale x 1 x float > %1, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x float> @llvm.vp.fadd.nxv1i64(<vscale x 1 x float> %2, <vscale x 1 x float> shufflevector(<vscale x 1 x float> insertelement(<vscale x 1 x float> poison, float 42.0, i32 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x float> @llvm.vp.fadd.nxv1i64(<vscale x 1 x float> %x, <vscale x 1 x float> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x float> %4
+}
+
+define <vscale x 1 x float> @fadd_nxv1i64_anymask(<vscale x 1 x float> %x, float %y, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; BOTH-LABEL: fadd_nxv1i64_anymask:
+; BOTH: # %bb.0:
+; BOTH-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; BOTH-NEXT: vfmv.v.f v9, fa0
+; BOTH-NEXT: lui a1, 270976
+; BOTH-NEXT: fmv.w.x fa5, a1
+; BOTH-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; BOTH-NEXT: vfadd.vf v9, v9, fa5, v0.t
+; BOTH-NEXT: vfadd.vv v8, v8, v9, v0.t
+; BOTH-NEXT: ret
+ %1 = insertelement <vscale x 1 x float> poison, float %y, i32 0
+ %2 = shufflevector <vscale x 1 x float> %1, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x float> @llvm.vp.fadd.nxv1i64(<vscale x 1 x float> %2, <vscale x 1 x float> shufflevector(<vscale x 1 x float> insertelement(<vscale x 1 x float> poison, float 42.0, i32 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x float> @llvm.vp.fadd.nxv1i64(<vscale x 1 x float> %x, <vscale x 1 x float> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x float> %4
+}
+
+define <vscale x 1 x float> @fsub_nxv1i64_allonesmask(<vscale x 1 x float> %x, float%y, i32 zeroext %evl) {
+; INST-COMBINE-LABEL: fsub_nxv1i64_allonesmask:
+; INST-COMBINE: # %bb.0:
+; INST-COMBINE-NEXT: lui a1, 795264
+; INST-COMBINE-NEXT: fmv.w.x fa5, a1
+; INST-COMBINE-NEXT: fadd.s fa5, fa0, fa5
+; INST-COMBINE-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; INST-COMBINE-NEXT: vfadd.vf v8, v8, fa5
+; INST-COMBINE-NEXT: ret
+;
+; NO-INST-COMBINE-LABEL: fsub_nxv1i64_allonesmask:
+; NO-INST-COMBINE: # %bb.0:
+; NO-INST-COMBINE-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; NO-INST-COMBINE-NEXT: vfmv.v.f v9, fa0
+; NO-INST-COMBINE-NEXT: lui a1, 270976
+; NO-INST-COMBINE-NEXT: fmv.w.x fa5, a1
+; NO-INST-COMBINE-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; NO-INST-COMBINE-NEXT: vfsub.vf v9, v9, fa5
+; NO-INST-COMBINE-NEXT: vfadd.vv v8, v8, v9
+; NO-INST-COMBINE-NEXT: ret
+ %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %1 = insertelement <vscale x 1 x float> poison, float %y, i32 0
+ %2 = shufflevector <vscale x 1 x float > %1, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x float> @llvm.vp.fsub.nxv1i64(<vscale x 1 x float> %2, <vscale x 1 x float> shufflevector(<vscale x 1 x float> insertelement(<vscale x 1 x float> poison, float 42.0, i32 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x float> @llvm.vp.fadd.nxv1i64(<vscale x 1 x float> %x, <vscale x 1 x float> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x float> %4
+}
+
+define <vscale x 1 x float> @fsub_nxv1i64_anymask(<vscale x 1 x float> %x, float %y, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; BOTH-LABEL: fsub_nxv1i64_anymask:
+; BOTH: # %bb.0:
+; BOTH-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; BOTH-NEXT: vfmv.v.f v9, fa0
+; BOTH-NEXT: lui a1, 270976
+; BOTH-NEXT: fmv.w.x fa5, a1
+; BOTH-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; BOTH-NEXT: vfsub.vf v9, v9, fa5, v0.t
+; BOTH-NEXT: vfadd.vv v8, v8, v9, v0.t
+; BOTH-NEXT: ret
+ %1 = insertelement <vscale x 1 x float> poison, float %y, i32 0
+ %2 = shufflevector <vscale x 1 x float> %1, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x float> @llvm.vp.fsub.nxv1i64(<vscale x 1 x float> %2, <vscale x 1 x float> shufflevector(<vscale x 1 x float> insertelement(<vscale x 1 x float> poison, float 42.0, i32 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x float> @llvm.vp.fadd.nxv1i64(<vscale x 1 x float> %x, <vscale x 1 x float> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x float> %4
+}
+
+define <vscale x 1 x float> @fdiv_nxv1i64_allonesmask(<vscale x 1 x float> %x, float%y, i32 zeroext %evl) {
+; INST-COMBINE-LABEL: fdiv_nxv1i64_allonesmask:
+; INST-COMBINE: # %bb.0:
+; INST-COMBINE-NEXT: lui a1, 270976
+; INST-COMBINE-NEXT: fmv.w.x fa5, a1
+; INST-COMBINE-NEXT: fdiv.s fa5, fa0, fa5
+; INST-COMBINE-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; INST-COMBINE-NEXT: vfadd.vf v8, v8, fa5
+; INST-COMBINE-NEXT: ret
+;
+; NO-INST-COMBINE-LABEL: fdiv_nxv1i64_allonesmask:
+; NO-INST-COMBINE: # %bb.0:
+; NO-INST-COMBINE-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; NO-INST-COMBINE-NEXT: vfmv.v.f v9, fa0
+; NO-INST-COMBINE-NEXT: lui a1, 270976
+; NO-INST-COMBINE-NEXT: fmv.w.x fa5, a1
+; NO-INST-COMBINE-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; NO-INST-COMBINE-NEXT: vfdiv.vf v9, v9, fa5
+; NO-INST-COMBINE-NEXT: vfadd.vv v8, v8, v9
+; NO-INST-COMBINE-NEXT: ret
+ %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+ %1 = insertelement <vscale x 1 x float> poison, float %y, i32 0
+ %2 = shufflevector <vscale x 1 x float > %1, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x float> @llvm.vp.fdiv.nxv1i64(<vscale x 1 x float> %2, <vscale x 1 x float> shufflevector(<vscale x 1 x float> insertelement(<vscale x 1 x float> poison, float 42.0, i32 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x float> @llvm.vp.fadd.nxv1i64(<vscale x 1 x float> %x, <vscale x 1 x float> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x float> %4
+}
+
+define <vscale x 1 x float> @fdiv_nxv1i64_anymask(<vscale x 1 x float> %x, float %y, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; BOTH-LABEL: fdiv_nxv1i64_anymask:
+; BOTH: # %bb.0:
+; BOTH-NEXT: vsetvli a1, zero, e32, mf2, ta, ma
+; BOTH-NEXT: vfmv.v.f v9, fa0
+; BOTH-NEXT: lui a1, 270976
+; BOTH-NEXT: fmv.w.x fa5, a1
+; BOTH-NEXT: vsetvli zero, a0, e32, mf2, ta, ma
+; BOTH-NEXT: vfdiv.vf v9, v9, fa5, v0.t
+; BOTH-NEXT: vfadd.vv v8, v8, v9, v0.t
+; BOTH-NEXT: ret
+ %1 = insertelement <vscale x 1 x float> poison, float %y, i32 0
+ %2 = shufflevector <vscale x 1 x float> %1, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
+ %3 = call <vscale x 1 x float> @llvm.vp.fdiv.nxv1i64(<vscale x 1 x float> %2, <vscale x 1 x float> shufflevector(<vscale x 1 x float> insertelement(<vscale x 1 x float> poison, float 42.0, i32 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 1 x float> @llvm.vp.fadd.nxv1i64(<vscale x 1 x float> %x, <vscale x 1 x float> %3, <vscale x 1 x i1> %mask, i32 %evl)
+ ret <vscale x 1 x float> %4
+}
+
+; Need to fix crash in SelectionDAG before I can uncomment
+; define <vscale x 1 x float> @frem_nxv1i64_allonesmask(<vscale x 1 x float> %x, float%y, i32 zeroext %evl) {
+; %splat = insertelement <vscale x 1 x i1> poison, i1 -1, i32 0
+; %mask = shufflevector <vscale x 1 x i1> %splat, <vscale x 1 x i1> poison, <vscale x 1 x i32> zeroinitializer
+; %1 = insertelement <vscale x 1 x float> poison, float %y, i32 0
+; %2 = shufflevector <vscale x 1 x float > %1, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
+; %3 = call <vscale x 1 x float> @llvm.vp.frem.nxv1i64(<vscale x 1 x float> %2, <vscale x 1 x float> shufflevector(<vscale x 1 x float> insertelement(<vscale x 1 x float> poison, float 42.0, i32 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+; %4 = call <vscale x 1 x float> @llvm.vp.fadd.nxv1i64(<vscale x 1 x float> %x, <vscale x 1 x float> %3, <vscale x 1 x i1> %mask, i32 %evl)
+; ret <vscale x 1 x float> %4
+; }
+;
+; define <vscale x 1 x float> @frem_nxv1i64_anymask(<vscale x 1 x float> %x, float %y, <vscale x 1 x i1> %mask, i32 zeroext %evl) {
+; %1 = insertelement <vscale x 1 x float> poison, float %y, i32 0
+; %2 = shufflevector <vscale x 1 x float> %1, <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer
+; %3 = call <vscale x 1 x float> @llvm.vp.frem.nxv1i64(<vscale x 1 x float> %2, <vscale x 1 x float> shufflevector(<vscale x 1 x float> insertelement(<vscale x 1 x float> poison, float 42.0, i32 0), <vscale x 1 x float> poison, <vscale x 1 x i32> zeroinitializer), <vscale x 1 x i1> %mask, i32 %evl)
+; %4 = call <vscale x 1 x float> @llvm.vp.fadd.nxv1i64(<vscale x 1 x float> %x, <vscale x 1 x float> %3, <vscale x 1 x i1> %mask, i32 %evl)
+; ret <vscale x 1 x float> %4
+; }
+
+define <vscale x 8 x i8> @add_nxv8i8_allonesmask(<vscale x 8 x i8> %x, i8 %y, i32 zeroext %evl) {
+; INST-COMBINE-LABEL: add_nxv8i8_allonesmask:
+; INST-COMBINE: # %bb.0:
+; INST-COMBINE-NEXT: addi a0, a0, 42
+; INST-COMBINE-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; INST-COMBINE-NEXT: vmul.vx v8, v8, a0
+; INST-COMBINE-NEXT: ret
+;
+; NO-INST-COMBINE-LABEL: add_nxv8i8_allonesmask:
+; NO-INST-COMBINE: # %bb.0:
+; NO-INST-COMBINE-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vmv.v.x v9, a0
+; NO-INST-COMBINE-NEXT: li a0, 42
+; NO-INST-COMBINE-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; NO-INST-COMBINE-NEXT: vadd.vx v9, v9, a0
+; NO-INST-COMBINE-NEXT: vmul.vv v8, v8, v9
+; NO-INST-COMBINE-NEXT: ret
+ %splat = insertelement <vscale x 8 x i1> poison, i1 -1, i32 0
+ %mask = shufflevector <vscale x 8 x i1> %splat, <vscale x 8 x i1> poison, <vscale x 8 x i32> zeroinitializer
+ %1 = insertelement <vscale x 8 x i8> poison, i8 %y, i32 0
+ %2 = shufflevector <vscale x 8 x i8> %1, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %3 = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %2, <vscale x 8 x i8> shufflevector(<vscale x 8 x i8> insertelement(<vscale x 8 x i8> poison, i8 42, i32 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %3, <vscale x 8 x i1> %mask, i32 %evl)
+ ret <vscale x 8 x i8> %4
+}
+
+define <vscale x 8 x i8> @add_nxv8i8_anymask(<vscale x 8 x i8> %x, i8 %y, <vscale x 8 x i1> %mask, i32 zeroext %evl) {
+; BOTH-LABEL: add_nxv8i8_anymask:
+; BOTH: # %bb.0:
+; BOTH-NEXT: vsetvli a2, zero, e8, m1, ta, ma
+; BOTH-NEXT: vmv.v.x v9, a0
+; BOTH-NEXT: li a0, 42
+; BOTH-NEXT: vsetvli zero, a1, e8, m1, ta, ma
+; BOTH-NEXT: vadd.vx v9, v9, a0, v0.t
+; BOTH-NEXT: vmul.vv v8, v8, v9, v0.t
+; BOTH-NEXT: ret
+ %1 = insertelement <vscale x 8 x i8> poison, i8 %y, i32 0
+ %2 = shufflevector <vscale x 8 x i8> %1, <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer
+ %3 = call <vscale x 8 x i8> @llvm.vp.add.nxv8i8(<vscale x 8 x i8> %2, <vscale x 8 x i8> shufflevector(<vscale x 8 x i8> insertelement(<vscale x 8 x i8> poison, i8 42, i32 0), <vscale x 8 x i8> poison, <vscale x 8 x i32> zeroinitializer), <vscale x 8 x i1> %mask, i32 %evl)
+ %4 = call <vscale x 8 x i8> @llvm.vp.mul.nxv8i8(<vscale x 8 x i8> %x, <vscale x 8 x i8> %3, <vscale x 8 x i1> %mask, i32 %evl)
+ ret <vscale x 8 x i8> %4
+}
More information about the llvm-commits
mailing list