[llvm] 946bc50 - [RISCV] Define the vfsqrt RVV intrinsics
Evandro Menezes via llvm-commits
llvm-commits at lists.llvm.org
Thu Jan 7 15:42:36 PST 2021
Author: Evandro Menezes
Date: 2021-01-07T17:29:29-06:00
New Revision: 946bc50e4cbbc998c77e091c7956e996a7d409f0
URL: https://github.com/llvm/llvm-project/commit/946bc50e4cbbc998c77e091c7956e996a7d409f0
DIFF: https://github.com/llvm/llvm-project/commit/946bc50e4cbbc998c77e091c7956e996a7d409f0.diff
LOG: [RISCV] Define the vfsqrt RVV intrinsics
Define the `vfsqrt` IR intrinsics for the respective V instructions.
Authored-by: Roger Ferrer Ibanez <rofirrim at gmail.com>
Co-Authored-by: Evandro Menezes <evandro.menezes at sifive.com>
Differential Revision: https://reviews.llvm.org/D93745
Added:
llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll
llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll
Modified:
llvm/include/llvm/IR/IntrinsicsRISCV.td
llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
Removed:
################################################################################
diff --git a/llvm/include/llvm/IR/IntrinsicsRISCV.td b/llvm/include/llvm/IR/IntrinsicsRISCV.td
index fe0d6b00a3c2..a28f8eb5ab08 100644
--- a/llvm/include/llvm/IR/IntrinsicsRISCV.td
+++ b/llvm/include/llvm/IR/IntrinsicsRISCV.td
@@ -189,6 +189,19 @@ let TargetPrefix = "riscv" in {
LLVMPointerType<LLVMMatchType<0>>, llvm_anyvector_ty,
LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
[NoCapture<ArgIndex<1>>, IntrWriteMem]>, RISCVVIntrinsic;
+ // For destination vector type is the same as source vector.
+ // Input: (vector_in, vl)
+ class RISCVUnaryAANoMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
+ // For destination vector type is the same as first source vector (with mask).
+ // Input: (vector_in, mask, vl)
+ class RISCVUnaryAAMask
+ : Intrinsic<[llvm_anyvector_ty],
+ [LLVMMatchType<0>, LLVMMatchType<0>,
+ LLVMScalarOrSameVectorWidth<0, llvm_i1_ty>, llvm_anyint_ty],
+ [IntrNoMem]>, RISCVVIntrinsic;
// For destination vector type is the same as first and second source vector.
// Input: (vector_in, vector_in, vl)
class RISCVBinaryAAANoMask
@@ -210,7 +223,6 @@ let TargetPrefix = "riscv" in {
[IntrNoMem]>, RISCVVIntrinsic {
let ExtendOperand = 2;
}
-
// For destination vector type is the same as first source vector (with mask).
// Input: (maskedoff, vector_in, vector_in/scalar_in, mask, vl)
class RISCVBinaryAAXMask
@@ -326,7 +338,6 @@ let TargetPrefix = "riscv" in {
[IntrNoMem, IntrHasSideEffects]>, RISCVVIntrinsic {
let ExtendOperand = 3;
}
-
class RISCVTernaryAAAXNoMask
: Intrinsic<[llvm_anyvector_ty],
[LLVMMatchType<0>, LLVMMatchType<0>, llvm_anyint_ty,
@@ -470,7 +481,10 @@ let TargetPrefix = "riscv" in {
def "int_riscv_" # NAME : RISCVIStore;
def "int_riscv_" # NAME # "_mask" : RISCVIStoreMask;
}
-
+ multiclass RISCVUnaryAA {
+ def "int_riscv_" # NAME : RISCVUnaryAANoMask;
+ def "int_riscv_" # NAME # "_mask" : RISCVUnaryAAMask;
+ }
// AAX means the destination type(A) is the same as the first source
// type(A). X means any type for the second source operand.
multiclass RISCVBinaryAAX {
@@ -685,6 +699,8 @@ let TargetPrefix = "riscv" in {
defm vfwmsac : RISCVTernaryWide;
defm vfwnmsac : RISCVTernaryWide;
+ defm vfsqrt : RISCVUnaryAA;
+
defm vfmin : RISCVBinaryAAX;
defm vfmax : RISCVBinaryAAX;
diff --git a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
index 62d887524950..2557b49f0c1c 100644
--- a/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
+++ b/llvm/lib/Target/RISCV/RISCVInstrInfoVPseudos.td
@@ -1097,6 +1097,15 @@ multiclass VPseudoUnaryV_F_NoDummyMask {
}
}
+multiclass VPseudoUnaryV_V {
+ foreach m = MxList.m in {
+ let VLMul = m.value in {
+ def "_V_" # m.MX : VPseudoUnaryNoMask<m.vrclass, m.vrclass>;
+ def "_V_" # m.MX # "_MASK" : VPseudoUnaryMask<m.vrclass, m.vrclass>;
+ }
+ }
+}
+
multiclass PseudoUnaryV_VF2 {
defvar constraints = "@earlyclobber $rd";
foreach m = MxList.m[1-6] in
@@ -1688,6 +1697,18 @@ multiclass VPatUnaryV_VF<string intrinsic, string instruction, string suffix,
}
}
+multiclass VPatUnaryV_V<string intrinsic, string instruction,
+ list<VTypeInfo> vtilist> {
+ foreach vti = vtilist in {
+ def : VPatUnaryNoMask<intrinsic, instruction, "V",
+ vti.Vector, vti.Vector,
+ vti.SEW, vti.LMul, vti.RegClass>;
+ def : VPatUnaryMask<intrinsic, instruction, "V",
+ vti.Vector, vti.Vector, vti.Mask,
+ vti.SEW, vti.LMul, vti.RegClass, vti.RegClass>;
+ }
+}
+
multiclass VPatNullaryV<string intrinsic, string instruction>
{
foreach vti = AllIntegerVectors in {
@@ -1712,7 +1733,6 @@ multiclass VPatNullaryM<string intrinsic, string inst> {
(NoX0 GPR:$vl), mti.SEW)>;
}
-
multiclass VPatBinary<string intrinsic,
string inst,
string kind,
@@ -2574,7 +2594,6 @@ defm PseudoVMERGE : VPseudoBinaryV_VM_XM_IM;
//===----------------------------------------------------------------------===//
// 12.17. Vector Integer Move Instructions
//===----------------------------------------------------------------------===//
-
defm PseudoVMV_V : VPseudoUnaryV_V_X_I_NoDummyMask;
//===----------------------------------------------------------------------===//
@@ -2670,6 +2689,11 @@ defm PseudoVFWNMACC : VPseudoTernaryW_VV_VX</*IsFloat*/true>;
defm PseudoVFWMSAC : VPseudoTernaryW_VV_VX</*IsFloat*/true>;
defm PseudoVFWNMSAC : VPseudoTernaryW_VV_VX</*IsFloat*/true>;
+//===----------------------------------------------------------------------===//
+// 14.8. Vector Floating-Point Square-Root Instruction
+//===----------------------------------------------------------------------===//
+defm PseudoVFSQRT : VPseudoUnaryV_V;
+
//===----------------------------------------------------------------------===//
// 14.11. Vector Floating-Point Min/Max Instructions
//===----------------------------------------------------------------------===//
@@ -3306,6 +3330,11 @@ defm "" : VPatTernaryW_VV_VX<"int_riscv_vfwnmacc", "PseudoVFWNMACC", AllWidenabl
defm "" : VPatTernaryW_VV_VX<"int_riscv_vfwmsac", "PseudoVFWMSAC", AllWidenableFloatVectors>;
defm "" : VPatTernaryW_VV_VX<"int_riscv_vfwnmsac", "PseudoVFWNMSAC", AllWidenableFloatVectors>;
+//===----------------------------------------------------------------------===//
+// 14.8. Vector Floating-Point Square-Root Instruction
+//===----------------------------------------------------------------------===//
+defm "" : VPatUnaryV_V<"int_riscv_vfsqrt", "PseudoVFSQRT", AllFloatVectors>;
+
//===----------------------------------------------------------------------===//
// 14.11. Vector Floating-Point Min/Max Instructions
//===----------------------------------------------------------------------===//
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll
new file mode 100644
index 000000000000..48c6dc9999ac
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv32.ll
@@ -0,0 +1,512 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv32 -mattr=+experimental-v,+f,+experimental-zfh -verify-machineinstrs \
+; RUN: --riscv-no-aliases < %s | FileCheck %s
+declare <vscale x 1 x half> @llvm.riscv.vfsqrt.nxv1f16(
+ <vscale x 1 x half>,
+ i32);
+
+define <vscale x 1 x half> @intrinsic_vfsqrt_v_nxv1f16_nxv1f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f16_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 1 x half> %0,
+ i32 %1) nounwind {
+entry:
+ %a = call <vscale x 1 x half> @llvm.riscv.vfsqrt.nxv1f16(
+ <vscale x 1 x half> %0,
+ i32 %1)
+
+ ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vfsqrt.mask.nxv1f16(
+ <vscale x 1 x half>,
+ <vscale x 1 x half>,
+ <vscale x 1 x i1>,
+ i32);
+
+define <vscale x 1 x half> @intrinsic_vfsqrt_mask_v_nxv1f16_nxv1f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f16_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 1 x half> %0,
+ <vscale x 1 x half> %1,
+ <vscale x 1 x i1> %2,
+ i32 %3) nounwind {
+entry:
+ %a = call <vscale x 1 x half> @llvm.riscv.vfsqrt.mask.nxv1f16(
+ <vscale x 1 x half> %0,
+ <vscale x 1 x half> %1,
+ <vscale x 1 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vfsqrt.nxv2f16(
+ <vscale x 2 x half>,
+ i32);
+
+define <vscale x 2 x half> @intrinsic_vfsqrt_v_nxv2f16_nxv2f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f16_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 2 x half> %0,
+ i32 %1) nounwind {
+entry:
+ %a = call <vscale x 2 x half> @llvm.riscv.vfsqrt.nxv2f16(
+ <vscale x 2 x half> %0,
+ i32 %1)
+
+ ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vfsqrt.mask.nxv2f16(
+ <vscale x 2 x half>,
+ <vscale x 2 x half>,
+ <vscale x 2 x i1>,
+ i32);
+
+define <vscale x 2 x half> @intrinsic_vfsqrt_mask_v_nxv2f16_nxv2f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f16_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 2 x half> %0,
+ <vscale x 2 x half> %1,
+ <vscale x 2 x i1> %2,
+ i32 %3) nounwind {
+entry:
+ %a = call <vscale x 2 x half> @llvm.riscv.vfsqrt.mask.nxv2f16(
+ <vscale x 2 x half> %0,
+ <vscale x 2 x half> %1,
+ <vscale x 2 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vfsqrt.nxv4f16(
+ <vscale x 4 x half>,
+ i32);
+
+define <vscale x 4 x half> @intrinsic_vfsqrt_v_nxv4f16_nxv4f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f16_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 4 x half> %0,
+ i32 %1) nounwind {
+entry:
+ %a = call <vscale x 4 x half> @llvm.riscv.vfsqrt.nxv4f16(
+ <vscale x 4 x half> %0,
+ i32 %1)
+
+ ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vfsqrt.mask.nxv4f16(
+ <vscale x 4 x half>,
+ <vscale x 4 x half>,
+ <vscale x 4 x i1>,
+ i32);
+
+define <vscale x 4 x half> @intrinsic_vfsqrt_mask_v_nxv4f16_nxv4f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f16_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 4 x half> %0,
+ <vscale x 4 x half> %1,
+ <vscale x 4 x i1> %2,
+ i32 %3) nounwind {
+entry:
+ %a = call <vscale x 4 x half> @llvm.riscv.vfsqrt.mask.nxv4f16(
+ <vscale x 4 x half> %0,
+ <vscale x 4 x half> %1,
+ <vscale x 4 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vfsqrt.nxv8f16(
+ <vscale x 8 x half>,
+ i32);
+
+define <vscale x 8 x half> @intrinsic_vfsqrt_v_nxv8f16_nxv8f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f16_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 8 x half> %0,
+ i32 %1) nounwind {
+entry:
+ %a = call <vscale x 8 x half> @llvm.riscv.vfsqrt.nxv8f16(
+ <vscale x 8 x half> %0,
+ i32 %1)
+
+ ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vfsqrt.mask.nxv8f16(
+ <vscale x 8 x half>,
+ <vscale x 8 x half>,
+ <vscale x 8 x i1>,
+ i32);
+
+define <vscale x 8 x half> @intrinsic_vfsqrt_mask_v_nxv8f16_nxv8f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv8f16_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 8 x half> %0,
+ <vscale x 8 x half> %1,
+ <vscale x 8 x i1> %2,
+ i32 %3) nounwind {
+entry:
+ %a = call <vscale x 8 x half> @llvm.riscv.vfsqrt.mask.nxv8f16(
+ <vscale x 8 x half> %0,
+ <vscale x 8 x half> %1,
+ <vscale x 8 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vfsqrt.nxv16f16(
+ <vscale x 16 x half>,
+ i32);
+
+define <vscale x 16 x half> @intrinsic_vfsqrt_v_nxv16f16_nxv16f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv16f16_nxv16f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 16 x half> %0,
+ i32 %1) nounwind {
+entry:
+ %a = call <vscale x 16 x half> @llvm.riscv.vfsqrt.nxv16f16(
+ <vscale x 16 x half> %0,
+ i32 %1)
+
+ ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vfsqrt.mask.nxv16f16(
+ <vscale x 16 x half>,
+ <vscale x 16 x half>,
+ <vscale x 16 x i1>,
+ i32);
+
+define <vscale x 16 x half> @intrinsic_vfsqrt_mask_v_nxv16f16_nxv16f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv16f16_nxv16f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 16 x half> %0,
+ <vscale x 16 x half> %1,
+ <vscale x 16 x i1> %2,
+ i32 %3) nounwind {
+entry:
+ %a = call <vscale x 16 x half> @llvm.riscv.vfsqrt.mask.nxv16f16(
+ <vscale x 16 x half> %0,
+ <vscale x 16 x half> %1,
+ <vscale x 16 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vfsqrt.nxv32f16(
+ <vscale x 32 x half>,
+ i32);
+
+define <vscale x 32 x half> @intrinsic_vfsqrt_v_nxv32f16_nxv32f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv32f16_nxv32f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 32 x half> %0,
+ i32 %1) nounwind {
+entry:
+ %a = call <vscale x 32 x half> @llvm.riscv.vfsqrt.nxv32f16(
+ <vscale x 32 x half> %0,
+ i32 %1)
+
+ ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vfsqrt.mask.nxv32f16(
+ <vscale x 32 x half>,
+ <vscale x 32 x half>,
+ <vscale x 32 x i1>,
+ i32);
+
+define <vscale x 32 x half> @intrinsic_vfsqrt_mask_v_nxv32f16_nxv32f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv32f16_nxv32f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e16,m8,ta,mu
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e16,m8,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v8, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 32 x half> %0,
+ <vscale x 32 x half> %1,
+ <vscale x 32 x i1> %2,
+ i32 %3) nounwind {
+entry:
+ %a = call <vscale x 32 x half> @llvm.riscv.vfsqrt.mask.nxv32f16(
+ <vscale x 32 x half> %0,
+ <vscale x 32 x half> %1,
+ <vscale x 32 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfsqrt.nxv1f32(
+ <vscale x 1 x float>,
+ i32);
+
+define <vscale x 1 x float> @intrinsic_vfsqrt_v_nxv1f32_nxv1f32(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f32_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 1 x float> %0,
+ i32 %1) nounwind {
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfsqrt.nxv1f32(
+ <vscale x 1 x float> %0,
+ i32 %1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfsqrt.mask.nxv1f32(
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ <vscale x 1 x i1>,
+ i32);
+
+define <vscale x 1 x float> @intrinsic_vfsqrt_mask_v_nxv1f32_nxv1f32(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f32_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ <vscale x 1 x i1> %2,
+ i32 %3) nounwind {
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfsqrt.mask.nxv1f32(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ <vscale x 1 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(
+ <vscale x 2 x float>,
+ i32);
+
+define <vscale x 2 x float> @intrinsic_vfsqrt_v_nxv2f32_nxv2f32(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f32_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 2 x float> %0,
+ i32 %1) nounwind {
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(
+ <vscale x 2 x float> %0,
+ i32 %1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfsqrt.mask.nxv2f32(
+ <vscale x 2 x float>,
+ <vscale x 2 x float>,
+ <vscale x 2 x i1>,
+ i32);
+
+define <vscale x 2 x float> @intrinsic_vfsqrt_mask_v_nxv2f32_nxv2f32(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f32_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %1,
+ <vscale x 2 x i1> %2,
+ i32 %3) nounwind {
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfsqrt.mask.nxv2f32(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %1,
+ <vscale x 2 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(
+ <vscale x 4 x float>,
+ i32);
+
+define <vscale x 4 x float> @intrinsic_vfsqrt_v_nxv4f32_nxv4f32(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f32_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 4 x float> %0,
+ i32 %1) nounwind {
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(
+ <vscale x 4 x float> %0,
+ i32 %1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfsqrt.mask.nxv4f32(
+ <vscale x 4 x float>,
+ <vscale x 4 x float>,
+ <vscale x 4 x i1>,
+ i32);
+
+define <vscale x 4 x float> @intrinsic_vfsqrt_mask_v_nxv4f32_nxv4f32(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f32_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %1,
+ <vscale x 4 x i1> %2,
+ i32 %3) nounwind {
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfsqrt.mask.nxv4f32(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %1,
+ <vscale x 4 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfsqrt.nxv8f32(
+ <vscale x 8 x float>,
+ i32);
+
+define <vscale x 8 x float> @intrinsic_vfsqrt_v_nxv8f32_nxv8f32(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f32_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 8 x float> %0,
+ i32 %1) nounwind {
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfsqrt.nxv8f32(
+ <vscale x 8 x float> %0,
+ i32 %1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfsqrt.mask.nxv8f32(
+ <vscale x 8 x float>,
+ <vscale x 8 x float>,
+ <vscale x 8 x i1>,
+ i32);
+
+define <vscale x 8 x float> @intrinsic_vfsqrt_mask_v_nxv8f32_nxv8f32(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv8f32_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %1,
+ <vscale x 8 x i1> %2,
+ i32 %3) nounwind {
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfsqrt.mask.nxv8f32(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %1,
+ <vscale x 8 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfsqrt.nxv16f32(
+ <vscale x 16 x float>,
+ i32);
+
+define <vscale x 16 x float> @intrinsic_vfsqrt_v_nxv16f32_nxv16f32(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv16f32_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 16 x float> %0,
+ i32 %1) nounwind {
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfsqrt.nxv16f32(
+ <vscale x 16 x float> %0,
+ i32 %1)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfsqrt.mask.nxv16f32(
+ <vscale x 16 x float>,
+ <vscale x 16 x float>,
+ <vscale x 16 x i1>,
+ i32);
+
+define <vscale x 16 x float> @intrinsic_vfsqrt_mask_v_nxv16f32_nxv16f32(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv16f32_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e32,m8,ta,mu
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v8, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %1,
+ <vscale x 16 x i1> %2,
+ i32 %3) nounwind {
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfsqrt.mask.nxv16f32(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %1,
+ <vscale x 16 x i1> %2,
+ i32 %3)
+
+ ret <vscale x 16 x float> %a
+}
diff --git a/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll
new file mode 100644
index 000000000000..087069384b4a
--- /dev/null
+++ b/llvm/test/CodeGen/RISCV/rvv/vfsqrt-rv64.ll
@@ -0,0 +1,698 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=riscv64 -mattr=+experimental-v,+d,+experimental-zfh -verify-machineinstrs \
+; RUN: --riscv-no-aliases < %s | FileCheck %s
+declare <vscale x 1 x half> @llvm.riscv.vfsqrt.nxv1f16(
+ <vscale x 1 x half>,
+ i64);
+
+define <vscale x 1 x half> @intrinsic_vfsqrt_v_nxv1f16_nxv1f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f16_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 1 x half> %0,
+ i64 %1) nounwind {
+entry:
+ %a = call <vscale x 1 x half> @llvm.riscv.vfsqrt.nxv1f16(
+ <vscale x 1 x half> %0,
+ i64 %1)
+
+ ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 1 x half> @llvm.riscv.vfsqrt.mask.nxv1f16(
+ <vscale x 1 x half>,
+ <vscale x 1 x half>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x half> @intrinsic_vfsqrt_mask_v_nxv1f16_nxv1f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f16_nxv1f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf4,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 1 x half> %0,
+ <vscale x 1 x half> %1,
+ <vscale x 1 x i1> %2,
+ i64 %3) nounwind {
+entry:
+ %a = call <vscale x 1 x half> @llvm.riscv.vfsqrt.mask.nxv1f16(
+ <vscale x 1 x half> %0,
+ <vscale x 1 x half> %1,
+ <vscale x 1 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 1 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vfsqrt.nxv2f16(
+ <vscale x 2 x half>,
+ i64);
+
+define <vscale x 2 x half> @intrinsic_vfsqrt_v_nxv2f16_nxv2f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f16_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 2 x half> %0,
+ i64 %1) nounwind {
+entry:
+ %a = call <vscale x 2 x half> @llvm.riscv.vfsqrt.nxv2f16(
+ <vscale x 2 x half> %0,
+ i64 %1)
+
+ ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 2 x half> @llvm.riscv.vfsqrt.mask.nxv2f16(
+ <vscale x 2 x half>,
+ <vscale x 2 x half>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x half> @intrinsic_vfsqrt_mask_v_nxv2f16_nxv2f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f16_nxv2f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,mf2,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 2 x half> %0,
+ <vscale x 2 x half> %1,
+ <vscale x 2 x i1> %2,
+ i64 %3) nounwind {
+entry:
+ %a = call <vscale x 2 x half> @llvm.riscv.vfsqrt.mask.nxv2f16(
+ <vscale x 2 x half> %0,
+ <vscale x 2 x half> %1,
+ <vscale x 2 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 2 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vfsqrt.nxv4f16(
+ <vscale x 4 x half>,
+ i64);
+
+define <vscale x 4 x half> @intrinsic_vfsqrt_v_nxv4f16_nxv4f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f16_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 4 x half> %0,
+ i64 %1) nounwind {
+entry:
+ %a = call <vscale x 4 x half> @llvm.riscv.vfsqrt.nxv4f16(
+ <vscale x 4 x half> %0,
+ i64 %1)
+
+ ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 4 x half> @llvm.riscv.vfsqrt.mask.nxv4f16(
+ <vscale x 4 x half>,
+ <vscale x 4 x half>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x half> @intrinsic_vfsqrt_mask_v_nxv4f16_nxv4f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f16_nxv4f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m1,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 4 x half> %0,
+ <vscale x 4 x half> %1,
+ <vscale x 4 x i1> %2,
+ i64 %3) nounwind {
+entry:
+ %a = call <vscale x 4 x half> @llvm.riscv.vfsqrt.mask.nxv4f16(
+ <vscale x 4 x half> %0,
+ <vscale x 4 x half> %1,
+ <vscale x 4 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 4 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vfsqrt.nxv8f16(
+ <vscale x 8 x half>,
+ i64);
+
+define <vscale x 8 x half> @intrinsic_vfsqrt_v_nxv8f16_nxv8f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f16_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 8 x half> %0,
+ i64 %1) nounwind {
+entry:
+ %a = call <vscale x 8 x half> @llvm.riscv.vfsqrt.nxv8f16(
+ <vscale x 8 x half> %0,
+ i64 %1)
+
+ ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 8 x half> @llvm.riscv.vfsqrt.mask.nxv8f16(
+ <vscale x 8 x half>,
+ <vscale x 8 x half>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x half> @intrinsic_vfsqrt_mask_v_nxv8f16_nxv8f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv8f16_nxv8f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m2,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 8 x half> %0,
+ <vscale x 8 x half> %1,
+ <vscale x 8 x i1> %2,
+ i64 %3) nounwind {
+entry:
+ %a = call <vscale x 8 x half> @llvm.riscv.vfsqrt.mask.nxv8f16(
+ <vscale x 8 x half> %0,
+ <vscale x 8 x half> %1,
+ <vscale x 8 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 8 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vfsqrt.nxv16f16(
+ <vscale x 16 x half>,
+ i64);
+
+define <vscale x 16 x half> @intrinsic_vfsqrt_v_nxv16f16_nxv16f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv16f16_nxv16f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 16 x half> %0,
+ i64 %1) nounwind {
+entry:
+ %a = call <vscale x 16 x half> @llvm.riscv.vfsqrt.nxv16f16(
+ <vscale x 16 x half> %0,
+ i64 %1)
+
+ ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 16 x half> @llvm.riscv.vfsqrt.mask.nxv16f16(
+ <vscale x 16 x half>,
+ <vscale x 16 x half>,
+ <vscale x 16 x i1>,
+ i64);
+
+define <vscale x 16 x half> @intrinsic_vfsqrt_mask_v_nxv16f16_nxv16f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv16f16_nxv16f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m4,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 16 x half> %0,
+ <vscale x 16 x half> %1,
+ <vscale x 16 x i1> %2,
+ i64 %3) nounwind {
+entry:
+ %a = call <vscale x 16 x half> @llvm.riscv.vfsqrt.mask.nxv16f16(
+ <vscale x 16 x half> %0,
+ <vscale x 16 x half> %1,
+ <vscale x 16 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 16 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vfsqrt.nxv32f16(
+ <vscale x 32 x half>,
+ i64);
+
+define <vscale x 32 x half> @intrinsic_vfsqrt_v_nxv32f16_nxv32f16(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv32f16_nxv32f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e16,m8,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 32 x half> %0,
+ i64 %1) nounwind {
+entry:
+ %a = call <vscale x 32 x half> @llvm.riscv.vfsqrt.nxv32f16(
+ <vscale x 32 x half> %0,
+ i64 %1)
+
+ ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 32 x half> @llvm.riscv.vfsqrt.mask.nxv32f16(
+ <vscale x 32 x half>,
+ <vscale x 32 x half>,
+ <vscale x 32 x i1>,
+ i64);
+
+define <vscale x 32 x half> @intrinsic_vfsqrt_mask_v_nxv32f16_nxv32f16(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv32f16_nxv32f16:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e16,m8,ta,mu
+; CHECK-NEXT: vle16.v v8, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e16,m8,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v8, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 32 x half> %0,
+ <vscale x 32 x half> %1,
+ <vscale x 32 x i1> %2,
+ i64 %3) nounwind {
+entry:
+ %a = call <vscale x 32 x half> @llvm.riscv.vfsqrt.mask.nxv32f16(
+ <vscale x 32 x half> %0,
+ <vscale x 32 x half> %1,
+ <vscale x 32 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 32 x half> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfsqrt.nxv1f32(
+ <vscale x 1 x float>,
+ i64);
+
+define <vscale x 1 x float> @intrinsic_vfsqrt_v_nxv1f32_nxv1f32(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f32_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 1 x float> %0,
+ i64 %1) nounwind {
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfsqrt.nxv1f32(
+ <vscale x 1 x float> %0,
+ i64 %1)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 1 x float> @llvm.riscv.vfsqrt.mask.nxv1f32(
+ <vscale x 1 x float>,
+ <vscale x 1 x float>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x float> @intrinsic_vfsqrt_mask_v_nxv1f32_nxv1f32(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f32_nxv1f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,mf2,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ <vscale x 1 x i1> %2,
+ i64 %3) nounwind {
+entry:
+ %a = call <vscale x 1 x float> @llvm.riscv.vfsqrt.mask.nxv1f32(
+ <vscale x 1 x float> %0,
+ <vscale x 1 x float> %1,
+ <vscale x 1 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 1 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(
+ <vscale x 2 x float>,
+ i64);
+
+define <vscale x 2 x float> @intrinsic_vfsqrt_v_nxv2f32_nxv2f32(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f32_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 2 x float> %0,
+ i64 %1) nounwind {
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfsqrt.nxv2f32(
+ <vscale x 2 x float> %0,
+ i64 %1)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 2 x float> @llvm.riscv.vfsqrt.mask.nxv2f32(
+ <vscale x 2 x float>,
+ <vscale x 2 x float>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x float> @intrinsic_vfsqrt_mask_v_nxv2f32_nxv2f32(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f32_nxv2f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m1,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %1,
+ <vscale x 2 x i1> %2,
+ i64 %3) nounwind {
+entry:
+ %a = call <vscale x 2 x float> @llvm.riscv.vfsqrt.mask.nxv2f32(
+ <vscale x 2 x float> %0,
+ <vscale x 2 x float> %1,
+ <vscale x 2 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 2 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(
+ <vscale x 4 x float>,
+ i64);
+
+define <vscale x 4 x float> @intrinsic_vfsqrt_v_nxv4f32_nxv4f32(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f32_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 4 x float> %0,
+ i64 %1) nounwind {
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfsqrt.nxv4f32(
+ <vscale x 4 x float> %0,
+ i64 %1)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 4 x float> @llvm.riscv.vfsqrt.mask.nxv4f32(
+ <vscale x 4 x float>,
+ <vscale x 4 x float>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x float> @intrinsic_vfsqrt_mask_v_nxv4f32_nxv4f32(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f32_nxv4f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m2,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %1,
+ <vscale x 4 x i1> %2,
+ i64 %3) nounwind {
+entry:
+ %a = call <vscale x 4 x float> @llvm.riscv.vfsqrt.mask.nxv4f32(
+ <vscale x 4 x float> %0,
+ <vscale x 4 x float> %1,
+ <vscale x 4 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 4 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfsqrt.nxv8f32(
+ <vscale x 8 x float>,
+ i64);
+
+define <vscale x 8 x float> @intrinsic_vfsqrt_v_nxv8f32_nxv8f32(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f32_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 8 x float> %0,
+ i64 %1) nounwind {
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfsqrt.nxv8f32(
+ <vscale x 8 x float> %0,
+ i64 %1)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 8 x float> @llvm.riscv.vfsqrt.mask.nxv8f32(
+ <vscale x 8 x float>,
+ <vscale x 8 x float>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x float> @intrinsic_vfsqrt_mask_v_nxv8f32_nxv8f32(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv8f32_nxv8f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m4,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %1,
+ <vscale x 8 x i1> %2,
+ i64 %3) nounwind {
+entry:
+ %a = call <vscale x 8 x float> @llvm.riscv.vfsqrt.mask.nxv8f32(
+ <vscale x 8 x float> %0,
+ <vscale x 8 x float> %1,
+ <vscale x 8 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 8 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfsqrt.nxv16f32(
+ <vscale x 16 x float>,
+ i64);
+
+define <vscale x 16 x float> @intrinsic_vfsqrt_v_nxv16f32_nxv16f32(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv16f32_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e32,m8,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 16 x float> %0,
+ i64 %1) nounwind {
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfsqrt.nxv16f32(
+ <vscale x 16 x float> %0,
+ i64 %1)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 16 x float> @llvm.riscv.vfsqrt.mask.nxv16f32(
+ <vscale x 16 x float>,
+ <vscale x 16 x float>,
+ <vscale x 16 x i1>,
+ i64);
+
+define <vscale x 16 x float> @intrinsic_vfsqrt_mask_v_nxv16f32_nxv16f32(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv16f32_nxv16f32:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e32,m8,ta,mu
+; CHECK-NEXT: vle32.v v8, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e32,m8,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v8, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %1,
+ <vscale x 16 x i1> %2,
+ i64 %3) nounwind {
+entry:
+ %a = call <vscale x 16 x float> @llvm.riscv.vfsqrt.mask.nxv16f32(
+ <vscale x 16 x float> %0,
+ <vscale x 16 x float> %1,
+ <vscale x 16 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 16 x float> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vfsqrt.nxv1f64(
+ <vscale x 1 x double>,
+ i64);
+
+define <vscale x 1 x double> @intrinsic_vfsqrt_v_nxv1f64_nxv1f64(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv1f64_nxv1f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 1 x double> %0,
+ i64 %1) nounwind {
+entry:
+ %a = call <vscale x 1 x double> @llvm.riscv.vfsqrt.nxv1f64(
+ <vscale x 1 x double> %0,
+ i64 %1)
+
+ ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 1 x double> @llvm.riscv.vfsqrt.mask.nxv1f64(
+ <vscale x 1 x double>,
+ <vscale x 1 x double>,
+ <vscale x 1 x i1>,
+ i64);
+
+define <vscale x 1 x double> @intrinsic_vfsqrt_mask_v_nxv1f64_nxv1f64(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv1f64_nxv1f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m1,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v17, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 1 x double> %0,
+ <vscale x 1 x double> %1,
+ <vscale x 1 x i1> %2,
+ i64 %3) nounwind {
+entry:
+ %a = call <vscale x 1 x double> @llvm.riscv.vfsqrt.mask.nxv1f64(
+ <vscale x 1 x double> %0,
+ <vscale x 1 x double> %1,
+ <vscale x 1 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 1 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vfsqrt.nxv2f64(
+ <vscale x 2 x double>,
+ i64);
+
+define <vscale x 2 x double> @intrinsic_vfsqrt_v_nxv2f64_nxv2f64(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv2f64_nxv2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 2 x double> %0,
+ i64 %1) nounwind {
+entry:
+ %a = call <vscale x 2 x double> @llvm.riscv.vfsqrt.nxv2f64(
+ <vscale x 2 x double> %0,
+ i64 %1)
+
+ ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 2 x double> @llvm.riscv.vfsqrt.mask.nxv2f64(
+ <vscale x 2 x double>,
+ <vscale x 2 x double>,
+ <vscale x 2 x i1>,
+ i64);
+
+define <vscale x 2 x double> @intrinsic_vfsqrt_mask_v_nxv2f64_nxv2f64(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv2f64_nxv2f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m2,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v18, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 2 x double> %0,
+ <vscale x 2 x double> %1,
+ <vscale x 2 x i1> %2,
+ i64 %3) nounwind {
+entry:
+ %a = call <vscale x 2 x double> @llvm.riscv.vfsqrt.mask.nxv2f64(
+ <vscale x 2 x double> %0,
+ <vscale x 2 x double> %1,
+ <vscale x 2 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 2 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vfsqrt.nxv4f64(
+ <vscale x 4 x double>,
+ i64);
+
+define <vscale x 4 x double> @intrinsic_vfsqrt_v_nxv4f64_nxv4f64(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv4f64_nxv4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 4 x double> %0,
+ i64 %1) nounwind {
+entry:
+ %a = call <vscale x 4 x double> @llvm.riscv.vfsqrt.nxv4f64(
+ <vscale x 4 x double> %0,
+ i64 %1)
+
+ ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 4 x double> @llvm.riscv.vfsqrt.mask.nxv4f64(
+ <vscale x 4 x double>,
+ <vscale x 4 x double>,
+ <vscale x 4 x i1>,
+ i64);
+
+define <vscale x 4 x double> @intrinsic_vfsqrt_mask_v_nxv4f64_nxv4f64(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv4f64_nxv4f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m4,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v20, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 4 x double> %0,
+ <vscale x 4 x double> %1,
+ <vscale x 4 x i1> %2,
+ i64 %3) nounwind {
+entry:
+ %a = call <vscale x 4 x double> @llvm.riscv.vfsqrt.mask.nxv4f64(
+ <vscale x 4 x double> %0,
+ <vscale x 4 x double> %1,
+ <vscale x 4 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 4 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vfsqrt.nxv8f64(
+ <vscale x 8 x double>,
+ i64);
+
+define <vscale x 8 x double> @intrinsic_vfsqrt_v_nxv8f64_nxv8f64(
+; CHECK-LABEL: intrinsic_vfsqrt_v_nxv8f64_nxv8f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a0, a0, e64,m8,ta,mu
+; CHECK-NEXT: vfsqrt.v v16, v16
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 8 x double> %0,
+ i64 %1) nounwind {
+entry:
+ %a = call <vscale x 8 x double> @llvm.riscv.vfsqrt.nxv8f64(
+ <vscale x 8 x double> %0,
+ i64 %1)
+
+ ret <vscale x 8 x double> %a
+}
+
+declare <vscale x 8 x double> @llvm.riscv.vfsqrt.mask.nxv8f64(
+ <vscale x 8 x double>,
+ <vscale x 8 x double>,
+ <vscale x 8 x i1>,
+ i64);
+
+define <vscale x 8 x double> @intrinsic_vfsqrt_mask_v_nxv8f64_nxv8f64(
+; CHECK-LABEL: intrinsic_vfsqrt_mask_v_nxv8f64_nxv8f64:
+; CHECK: # %bb.0: # %entry
+; CHECK-NEXT: vsetvli a2, zero, e64,m8,ta,mu
+; CHECK-NEXT: vle64.v v8, (a0)
+; CHECK-NEXT: vsetvli a0, a1, e64,m8,tu,mu
+; CHECK-NEXT: vfsqrt.v v16, v8, v0.t
+; CHECK-NEXT: jalr zero, 0(ra)
+ <vscale x 8 x double> %0,
+ <vscale x 8 x double> %1,
+ <vscale x 8 x i1> %2,
+ i64 %3) nounwind {
+entry:
+ %a = call <vscale x 8 x double> @llvm.riscv.vfsqrt.mask.nxv8f64(
+ <vscale x 8 x double> %0,
+ <vscale x 8 x double> %1,
+ <vscale x 8 x i1> %2,
+ i64 %3)
+
+ ret <vscale x 8 x double> %a
+}
More information about the llvm-commits
mailing list