[clang] [llvm] [TargetLibraryInfo] Add libmvec support for risc-v (PR #119844)
Zhijin Zeng via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 13 01:28:44 PST 2024
https://github.com/zengdage created https://github.com/llvm/llvm-project/pull/119844
1. Rename LIBMVEC_X86 into LIBMVEC to support libmvec (glibc vector match library) in risc-v.
2. Add RVVM1/2/4/8 in VFISAKind to distingusih the LMUL value, so we can take full advantage of risc-v vector extension.
3. Declare some RVV vector math functions in VecFuncs.def.
In VecFuncs.def, I add the LI_DEFINE_VECFUNC of LIBMVEC_RVV as follow:
```
TLI_DEFINE_VECFUNC("sin", "_ZGV1Nxv_sin", SCALABLE(1), "_ZGVr1Nxv")
TLI_DEFINE_VECFUNC("sin", "_ZGV2Nxv_sin", SCALABLE(2), "_ZGVr2Nxv")
TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGV1Nxv_expf", SCALABLE(2), "_ZGVr1Nxv")
TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGV2Nxv_expf", SCALABLE(4), "_ZGVr2Nxv")
```
The `VEC` of TLI_DEFINE_VECFUNC (e.g., `_ZGV2Nxv_sin`), its name mangling rules defined in https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/455 . Now it's still under review.
The` VF` (e.g., ` SCALABLE(2)`) should be `vscale x (LMUL * 64 / sizeof(Type)`. For example, the type of the parameter of sin is double, so for `_ZGV2Nxv_sin`, its VF is `vscale x 2`.
In the `VABI_PREFIX` (e.g., `_ZGVr1Nxv`), `r` means RISC-V vector extension, `1` is LMUL value.
```
_ZGVr1Nxv --> RISC-V Vector Extension with LMUL=1
_ZGVr2Nxv --> RISC-V Vector Extension with LMUL=2
_ZGVr4Nxv --> RISC-V Vector Extension with LMUL=4
_ZGVr8Nxv --> RISC-V Vector Extension with LMUL=8
```
>From 101d7cb3749f0e3d892a7bf0bf797e40a334882f Mon Sep 17 00:00:00 2001
From: Zhijin Zeng <zhijin.zeng at spacemit.com>
Date: Thu, 12 Dec 2024 11:42:32 +0800
Subject: [PATCH 1/2] [NFC] Add vector call tests if use vector-library in
risc-v
---
.../LoopVectorize/RISCV/libm-vector-calls.ll | 415 ++++++++++++++++++
1 file changed, 415 insertions(+)
create mode 100644 llvm/test/Transforms/LoopVectorize/RISCV/libm-vector-calls.ll
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/libm-vector-calls.ll b/llvm/test/Transforms/LoopVectorize/RISCV/libm-vector-calls.ll
new file mode 100644
index 00000000000000..75fdd00e25f988
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/libm-vector-calls.ll
@@ -0,0 +1,415 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
+; RUN: opt -mtriple=riscv64 -mattr=+v -vector-library=LIBMVEC-X86 -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
+target triple = "riscv64-unknown-linux-gnu"
+
+define void @sin_f64(ptr nocapture %varray) {
+; CHECK-LABEL: define void @sin_f64(
+; CHECK-SAME: ptr nocapture [[VARRAY:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[VEC_IND]] to <4 x double>
+; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[STEP_ADD]] to <4 x double>
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP2]])
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 4
+; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[TMP15]], align 4
+; CHECK-NEXT: store <4 x double> [[TMP4]], ptr [[TMP7]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[IV]] to i32
+; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to double
+; CHECK-NEXT: [[CALL:%.*]] = tail call double @sin(double [[CONV]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[IV]]
+; CHECK-NEXT: store double [[CALL]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @sin(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !1
+
+for.end:
+ ret void
+}
+
+!1 = distinct !{!1, !2}
+!2 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+
+define void @sin_f64_intrinsic(ptr nocapture %varray) {
+; CHECK-LABEL: define void @sin_f64_intrinsic(
+; CHECK-SAME: ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[VEC_IND]] to <4 x double>
+; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[STEP_ADD]] to <4 x double>
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP2]])
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 4
+; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[TMP15]], align 4
+; CHECK-NEXT: store <4 x double> [[TMP4]], ptr [[TMP7]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[IV]] to i32
+; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to double
+; CHECK-NEXT: [[CALL:%.*]] = tail call double @llvm.sin.f64(double [[CONV]]) #[[ATTR4:[0-9]+]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[IV]]
+; CHECK-NEXT: store double [[CALL]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @llvm.sin.f64(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !31
+
+for.end:
+ ret void
+}
+
+!31 = distinct !{!31, !32}
+!32 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+define void @cos_f64(ptr nocapture %varray) {
+; CHECK-LABEL: define void @cos_f64(
+; CHECK-SAME: ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[VEC_IND]] to <4 x double>
+; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[STEP_ADD]] to <4 x double>
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP2]])
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 4
+; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[TMP15]], align 4
+; CHECK-NEXT: store <4 x double> [[TMP4]], ptr [[TMP7]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[IV]] to i32
+; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to double
+; CHECK-NEXT: [[CALL:%.*]] = tail call double @cos(double [[CONV]]) #[[ATTR5:[0-9]+]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[IV]]
+; CHECK-NEXT: store double [[CALL]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @cos(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !51
+
+for.end:
+ ret void
+}
+
+!51 = distinct !{!51, !52}
+!52 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+define void @cos_f64_intrinsic(ptr nocapture %varray) {
+; CHECK-LABEL: define void @cos_f64_intrinsic(
+; CHECK-SAME: ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[VEC_IND]] to <4 x double>
+; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[STEP_ADD]] to <4 x double>
+; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP2]])
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 4
+; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[TMP15]], align 4
+; CHECK-NEXT: store <4 x double> [[TMP4]], ptr [[TMP7]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
+; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
+; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[IV]] to i32
+; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to double
+; CHECK-NEXT: [[CALL:%.*]] = tail call double @llvm.cos.f64(double [[CONV]]) #[[ATTR6:[0-9]+]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[IV]]
+; CHECK-NEXT: store double [[CALL]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @llvm.cos.f64(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !71
+
+for.end:
+ ret void
+}
+
+!71 = distinct !{!71, !72}
+!72 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+define void @exp_f32(ptr nocapture %varray) {
+; CHECK-LABEL: define void @exp_f32(
+; CHECK-SAME: ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = sitofp <8 x i32> [[VEC_IND]] to <8 x float>
+; CHECK-NEXT: [[TMP2:%.*]] = sitofp <8 x i32> [[STEP_ADD]] to <8 x float>
+; CHECK-NEXT: [[TMP3:%.*]] = call fast <8 x float> @_ZGVdN8v_expf(<8 x float> [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = call fast <8 x float> @_ZGVdN8v_expf(<8 x float> [[TMP2]])
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[VARRAY]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 8
+; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[TMP15]], align 4
+; CHECK-NEXT: store <8 x float> [[TMP4]], ptr [[TMP7]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[STEP_ADD]], splat (i32 8)
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992
+; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br i1 false, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 992, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to float
+; CHECK-NEXT: [[CALL:%.*]] = tail call fast float @expf(float [[CONV]]) #[[ATTR7:[0-9]+]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VARRAY]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store float [[CALL]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1000
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %tmp = trunc i64 %indvars.iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call fast float @expf(float %conv)
+ %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+ store float %call, ptr %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !91
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+!91 = distinct !{!91, !92}
+!92 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+define void @exp_f32_intrin(ptr nocapture %varray) {
+; CHECK-LABEL: define void @exp_f32_intrin(
+; CHECK-SAME: ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[ENTRY:.*]]:
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
+; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0
+; CHECK-NEXT: [[TMP1:%.*]] = sitofp <8 x i32> [[VEC_IND]] to <8 x float>
+; CHECK-NEXT: [[TMP2:%.*]] = sitofp <8 x i32> [[STEP_ADD]] to <8 x float>
+; CHECK-NEXT: [[TMP3:%.*]] = call fast <8 x float> @_ZGVdN8v_expf(<8 x float> [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = call fast <8 x float> @_ZGVdN8v_expf(<8 x float> [[TMP2]])
+; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[VARRAY]], i64 [[TMP11]]
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
+; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 8
+; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[TMP15]], align 4
+; CHECK-NEXT: store <8 x float> [[TMP4]], ptr [[TMP7]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[STEP_ADD]], splat (i32 8)
+; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992
+; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: br i1 false, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 992, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: br label %[[FOR_BODY:.*]]
+; CHECK: [[FOR_BODY]]:
+; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
+; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[INDVARS_IV]] to i32
+; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to float
+; CHECK-NEXT: [[CALL:%.*]] = tail call fast float @llvm.exp.f32(float [[CONV]]) #[[ATTR8:[0-9]+]]
+; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VARRAY]], i64 [[INDVARS_IV]]
+; CHECK-NEXT: store float [[CALL]], ptr [[ARRAYIDX]], align 4
+; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
+; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1000
+; CHECK-NEXT: br i1 [[EXITCOND]], label %[[FOR_END]], label %[[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK: [[FOR_END]]:
+; CHECK-NEXT: ret void
+;
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %tmp = trunc i64 %indvars.iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call fast float @llvm.exp.f32(float %conv)
+ %arrayidx = getelementptr inbounds float, ptr %varray, i64 %indvars.iv
+ store float %call, ptr %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !101
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+!101 = distinct !{!101, !102}
+!102 = !{!"llvm.loop.vectorize.enable", i1 true}
+
+attributes #0 = { nounwind readnone }
+
+declare double @sin(double) #0
+declare double @llvm.sin.f64(double) #0
+declare double @cos(double) #0
+declare double @llvm.cos.f64(double) #0
+declare float @expf(float) #0
+declare float @llvm.exp.f32(float) #0
+;.
+; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
+; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
+; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
+; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
+; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
+; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
+; CHECK: [[LOOP6]] = distinct !{[[LOOP6]], [[META1]], [[META2]]}
+; CHECK: [[LOOP7]] = distinct !{[[LOOP7]], [[META2]], [[META1]]}
+; CHECK: [[LOOP8]] = distinct !{[[LOOP8]], [[META1]], [[META2]]}
+; CHECK: [[LOOP9]] = distinct !{[[LOOP9]], [[META2]], [[META1]]}
+; CHECK: [[LOOP10]] = distinct !{[[LOOP10]], [[META1]], [[META2]]}
+; CHECK: [[LOOP11]] = distinct !{[[LOOP11]], [[META2]], [[META1]]}
+; CHECK: [[LOOP12]] = distinct !{[[LOOP12]], [[META1]], [[META2]]}
+; CHECK: [[LOOP13]] = distinct !{[[LOOP13]], [[META2]], [[META1]]}
+;.
>From 34679d676e352784110fb12e386bb22eb7019312 Mon Sep 17 00:00:00 2001
From: Zhijin Zeng <zhijin.zeng at spacemit.com>
Date: Fri, 13 Dec 2024 10:56:28 +0800
Subject: [PATCH 2/2] [TargetLibraryInfo] Add libmvec support for risc-v
1. Rename LIBMVEC_X86 into LIBMVEC to support libmvec in risc-v.
2. Add RVVM1/2/4/8 in VFISAKind to distingusih the LMUL value.
3. Declare some RVV vector math functions in VecFuncs.def.
In VecFuncs.def, I add the LI_DEFINE_VECFUNC of LIBMVEC_RVV as follow:
'''
TLI_DEFINE_VECFUNC("sin", "_ZGV1Nxv_sin", SCALABLE(1), "_ZGVr1Nxv")
TLI_DEFINE_VECFUNC("sin", "_ZGV2Nxv_sin", SCALABLE(2), "_ZGVr2Nxv")
TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGV1Nxv_expf", SCALABLE(2), "_ZGVr1Nxv")
TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGV2Nxv_expf", SCALABLE(4), "_ZGVr2Nxv")
'''
The 'VEC' of TLI_DEFINE_VECFUNC is like '_ZGV2Nxv_sin', its name mangling rules
defined in
https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/455
The 'VF' of TLI_DEFINE_VECFUNC is like 'SCALABLE(2)', it should be
'vscale x (LMUL * 64 / sizeof(Type)'.
The 'VABI_PREFIX' of TLI_DEFINE_VECFUNC is like '_ZGVr1Nxv', 'r' means RISC-V
vector extension, '1' is the LMUL value.
'''
_ZGVr1Nxv --> RISC-V Vector Extension with LMUL=1
_ZGVr2Nxv --> RISC-V Vector Extension with LMUL=2
_ZGVr4Nxv --> RISC-V Vector Extension with LMUL=4
_ZGVr8Nxv --> RISC-V Vector Extension with LMUL=8
'''
---
clang/lib/Driver/ToolChains/Clang.cpp | 6 +-
clang/lib/Driver/ToolChains/CommonArgs.cpp | 2 +-
clang/lib/Driver/ToolChains/Flang.cpp | 6 +-
clang/test/Driver/fveclib.c | 10 +-
.../include/llvm/Analysis/TargetLibraryInfo.h | 2 +-
llvm/include/llvm/Analysis/VecFuncs.def | 73 +++++
llvm/include/llvm/IR/VFABIDemangler.h | 4 +
llvm/lib/Analysis/TargetLibraryInfo.cpp | 23 +-
llvm/lib/Frontend/Driver/CodeGenOptions.cpp | 2 +-
llvm/lib/IR/VFABIDemangler.cpp | 69 ++++-
.../Transforms/Utils/InjectTLIMappings.cpp | 4 +-
.../Generic/replace-intrinsics-with-veclib.ll | 18 +-
.../LoopVectorize/RISCV/libm-vector-calls.ll | 284 +++++++++++-------
.../X86/libm-vector-calls-VF2-VF8.ll | 2 +-
.../X86/libm-vector-calls-finite.ll | 2 +-
.../LoopVectorize/X86/libm-vector-calls.ll | 2 +-
llvm/test/Transforms/Util/add-TLI-mappings.ll | 24 +-
17 files changed, 379 insertions(+), 154 deletions(-)
diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp
index a020e00cd17392..1fdc1c66e9750e 100644
--- a/clang/lib/Driver/ToolChains/Clang.cpp
+++ b/clang/lib/Driver/ToolChains/Clang.cpp
@@ -5805,9 +5805,11 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA,
Triple.getArch() != llvm::Triple::x86_64)
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< Name << Triple.getArchName();
- } else if (Name == "LIBMVEC-X86") {
+ } else if (Name == "LIBMVEC") {
if (Triple.getArch() != llvm::Triple::x86 &&
- Triple.getArch() != llvm::Triple::x86_64)
+ Triple.getArch() != llvm::Triple::x86_64 &&
+ Triple.getArch() != llvm::Triple::riscv32 &&
+ Triple.getArch() != llvm::Triple::riscv64)
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< Name << Triple.getArchName();
} else if (Name == "SLEEF" || Name == "ArmPL") {
diff --git a/clang/lib/Driver/ToolChains/CommonArgs.cpp b/clang/lib/Driver/ToolChains/CommonArgs.cpp
index 0d851314a89539..2a4c298cfdeafb 100644
--- a/clang/lib/Driver/ToolChains/CommonArgs.cpp
+++ b/clang/lib/Driver/ToolChains/CommonArgs.cpp
@@ -909,7 +909,7 @@ void tools::addLTOOptions(const ToolChain &ToolChain, const ArgList &Args,
std::optional<StringRef> OptVal =
llvm::StringSwitch<std::optional<StringRef>>(ArgVecLib->getValue())
.Case("Accelerate", "Accelerate")
- .Case("LIBMVEC", "LIBMVEC-X86")
+ .Case("LIBMVEC", "LIBMVEC")
.Case("MASSV", "MASSV")
.Case("SVML", "SVML")
.Case("SLEEF", "sleefgnuabi")
diff --git a/clang/lib/Driver/ToolChains/Flang.cpp b/clang/lib/Driver/ToolChains/Flang.cpp
index c98fdbd157bac8..5d630d6e7777df 100644
--- a/clang/lib/Driver/ToolChains/Flang.cpp
+++ b/clang/lib/Driver/ToolChains/Flang.cpp
@@ -446,9 +446,11 @@ void Flang::addTargetOptions(const ArgList &Args,
Triple.getArch() != llvm::Triple::x86_64)
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< Name << Triple.getArchName();
- } else if (Name == "LIBMVEC-X86") {
+ } else if (Name == "LIBMVEC") {
if (Triple.getArch() != llvm::Triple::x86 &&
- Triple.getArch() != llvm::Triple::x86_64)
+ Triple.getArch() != llvm::Triple::x86_64 &&
+ Triple.getArch() != llvm::Triple::riscv32 &&
+ Triple.getArch() != llvm::Triple::riscv64)
D.Diag(diag::err_drv_unsupported_opt_for_target)
<< Name << Triple.getArchName();
} else if (Name == "SLEEF" || Name == "ArmPL") {
diff --git a/clang/test/Driver/fveclib.c b/clang/test/Driver/fveclib.c
index 7d0985c4dd4f48..0b127cb540e176 100644
--- a/clang/test/Driver/fveclib.c
+++ b/clang/test/Driver/fveclib.c
@@ -5,6 +5,7 @@
// RUN: %clang -### -c -fveclib=Darwin_libsystem_m %s 2>&1 | FileCheck --check-prefix=CHECK-DARWIN_LIBSYSTEM_M %s
// RUN: %clang -### -c --target=aarch64 -fveclib=SLEEF %s 2>&1 | FileCheck --check-prefix=CHECK-SLEEF %s
// RUN: %clang -### -c --target=riscv64-unknown-linux-gnu -fveclib=SLEEF -march=rv64gcv %s 2>&1 | FileCheck -check-prefix CHECK-SLEEF-RISCV %s
+// RUN: %clang -### -c --target=riscv64-unknown-linux-gnu -fveclib=libmvec -march=rv64gcv %s 2>&1 | FileCheck -check-prefix CHECK-libmvec %s
// RUN: %clang -### -c --target=aarch64 -fveclib=ArmPL %s 2>&1 | FileCheck --check-prefix=CHECK-ARMPL %s
// RUN: not %clang -c -fveclib=something %s 2>&1 | FileCheck --check-prefix=CHECK-INVALID %s
@@ -21,7 +22,7 @@
// RUN: not %clang --target=x86 -c -fveclib=SLEEF %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s
// RUN: not %clang --target=x86 -c -fveclib=ArmPL %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s
-// RUN: not %clang --target=aarch64 -c -fveclib=LIBMVEC-X86 %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s
+// RUN: not %clang --target=aarch64 -c -fveclib=LIBMVEC %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s
// RUN: not %clang --target=aarch64 -c -fveclib=SVML %s 2>&1 | FileCheck --check-prefix=CHECK-ERROR %s
// CHECK-ERROR: unsupported option {{.*}} for target
@@ -38,7 +39,7 @@
/* Verify that the correct vector library is passed to LTO flags. */
// RUN: %clang -### --target=x86_64-unknown-linux-gnu -fveclib=LIBMVEC -flto %s 2>&1 | FileCheck --check-prefix=CHECK-LTO-LIBMVEC %s
-// CHECK-LTO-LIBMVEC: "-plugin-opt=-vector-library=LIBMVEC-X86"
+// CHECK-LTO-LIBMVEC: "-plugin-opt=-vector-library=LIBMVEC"
// RUN: %clang -### --target=powerpc64-unknown-linux-gnu -fveclib=MASSV -flto %s 2>&1 | FileCheck --check-prefix=CHECK-LTO-MASSV %s
// CHECK-LTO-MASSV: "-plugin-opt=-vector-library=MASSV"
@@ -52,6 +53,9 @@
// RUN: %clang -### --target=riscv64-unknown-linux-gnu -fveclib=SLEEF -flto -march=rv64gcv %s 2>&1 | FileCheck -check-prefix CHECK-LTO-SLEEF-RISCV %s
// CHECK-LTO-SLEEF-RISCV: "-plugin-opt=-vector-library=sleefgnuabi"
+// RUN: %clang -### --target=riscv64-unknown-linux-gnu -fveclib=LIBMVEC -flto -march=rv64gcv %s 2>&1 | FileCheck -check-prefix CHECK-LTO-LIBMVEC-RISCV %s
+// CHECK-LTO-LIBMVEC-RISCV: "-plugin-opt=-vector-library=LIBMVEC"
+
// RUN: %clang -### --target=aarch64-linux-gnu -fveclib=ArmPL -flto %s 2>&1 | FileCheck --check-prefix=CHECK-LTO-ARMPL %s
// CHECK-LTO-ARMPL: "-plugin-opt=-vector-library=ArmPL"
@@ -110,7 +114,7 @@
// CHECK-ENABLED-LAST: math errno enabled by '-ffp-model=strict' after it was implicitly disabled by '-fveclib=ArmPL', this may limit the utilization of the vector library [-Wmath-errno-enabled-with-veclib]
/* Verify no warning when math-errno is re-enabled for a different veclib (that does not imply -fno-math-errno). */
-// RUN: %clang -### --target=aarch64-linux-gnu -fveclib=ArmPL -fmath-errno -fveclib=LIBMVEC %s 2>&1 | FileCheck --check-prefix=CHECK-REPEAT-VECLIB %s
+// RUN: %clang -### --target=aarch64-linux-gnu -fveclib=ArmPL -fmath-errno -fveclib=LIBMVEC-X86 %s 2>&1 | FileCheck --check-prefix=CHECK-REPEAT-VECLIB %s
// CHECK-REPEAT-VECLIB-NOT: math errno enabled
/// Verify that vectorized routines library is being linked in.
diff --git a/llvm/include/llvm/Analysis/TargetLibraryInfo.h b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
index f51d2bb9d50a21..b6bfcb865e20db 100644
--- a/llvm/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/llvm/include/llvm/Analysis/TargetLibraryInfo.h
@@ -124,7 +124,7 @@ class TargetLibraryInfoImpl {
NoLibrary, // Don't use any vector library.
Accelerate, // Use Accelerate framework.
DarwinLibSystemM, // Use Darwin's libsystem_m.
- LIBMVEC_X86, // GLIBC Vector Math library.
+ LIBMVEC, // GLIBC Vector Math library.
MASSV, // IBM MASS vector library.
SVML, // Intel short vector math library.
SLEEFGNUABI, // SLEEF - SIMD Library for Evaluating Elementary Functions.
diff --git a/llvm/include/llvm/Analysis/VecFuncs.def b/llvm/include/llvm/Analysis/VecFuncs.def
index 251331b9f860c8..5e5a37dc39f687 100644
--- a/llvm/include/llvm/Analysis/VecFuncs.def
+++ b/llvm/include/llvm/Analysis/VecFuncs.def
@@ -236,6 +236,79 @@ TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGVdN4v_log", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVbN4v_logf", FIXED(4), "_ZGV_LLVM_N4v")
TLI_DEFINE_VECFUNC("llvm.log.f32", "_ZGVdN8v_logf", FIXED(8), "_ZGV_LLVM_N8v")
+#elif defined(TLI_DEFINE_LIBMVEC_RVV_VECFUNCS)
+// GLIBC Vector math Functions for RISC-V
+
+TLI_DEFINE_VECFUNC("sin", "_ZGV1Nxv_sin", SCALABLE(1), "_ZGVr1Nxv")
+TLI_DEFINE_VECFUNC("sin", "_ZGV2Nxv_sin", SCALABLE(2), "_ZGVr2Nxv")
+TLI_DEFINE_VECFUNC("sin", "_ZGV4Nxv_sin", SCALABLE(4), "_ZGVr4Nxv")
+TLI_DEFINE_VECFUNC("sin", "_ZGV8Nxv_sin", SCALABLE(8), "_ZGVr8Nxv")
+
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGV1Nxv_sin", SCALABLE(1), "_ZGVr1Nxv")
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGV2Nxv_sin", SCALABLE(2), "_ZGVr2Nxv")
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGV4Nxv_sin", SCALABLE(4), "_ZGVr4Nxv")
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "_ZGV8Nxv_sin", SCALABLE(8), "_ZGVr8Nxv")
+
+TLI_DEFINE_VECFUNC("cos", "_ZGV1Nxv_cos", SCALABLE(1), "_ZGVr1Nxv")
+TLI_DEFINE_VECFUNC("cos", "_ZGV2Nxv_cos", SCALABLE(2), "_ZGVr2Nxv")
+TLI_DEFINE_VECFUNC("cos", "_ZGV4Nxv_cos", SCALABLE(4), "_ZGVr4Nxv")
+TLI_DEFINE_VECFUNC("cos", "_ZGV8Nxv_cos", SCALABLE(8), "_ZGVr8Nxv")
+
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGV1Nxv_cos", SCALABLE(1), "_ZGVr1Nxv")
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGV2Nxv_cos", SCALABLE(2), "_ZGVr2Nxv")
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGV4Nxv_cos", SCALABLE(4), "_ZGVr4Nxv")
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "_ZGV8Nxv_cos", SCALABLE(8), "_ZGVr8Nxv")
+
+TLI_DEFINE_VECFUNC("tan", "_ZGV1Nxv_tan", SCALABLE(1), "_ZGVr1Nxv")
+TLI_DEFINE_VECFUNC("tan", "_ZGV2Nxv_tan", SCALABLE(2), "_ZGVr2Nxv")
+TLI_DEFINE_VECFUNC("tan", "_ZGV4Nxv_tan", SCALABLE(4), "_ZGVr4Nxv")
+TLI_DEFINE_VECFUNC("tan", "_ZGV8Nxv_tan", SCALABLE(8), "_ZGVr8Nxv")
+
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGV1Nxv_tan", SCALABLE(1), "_ZGVr1Nxv")
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGV2Nxv_tan", SCALABLE(2), "_ZGVr2Nxv")
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGV4Nxv_tan", SCALABLE(4), "_ZGVr4Nxv")
+TLI_DEFINE_VECFUNC("llvm.tan.f64", "_ZGV8Nxv_tan", SCALABLE(8), "_ZGVr8Nxv")
+
+TLI_DEFINE_VECFUNC("pow", "_ZGV1Nxvv_pow", SCALABLE(1), "_ZGVr1Nxvv")
+TLI_DEFINE_VECFUNC("pow", "_ZGV2Nxvv_pow", SCALABLE(2), "_ZGVr2Nxvv")
+TLI_DEFINE_VECFUNC("pow", "_ZGV4Nxvv_pow", SCALABLE(4), "_ZGVr4Nxvv")
+TLI_DEFINE_VECFUNC("pow", "_ZGV8Nxvv_pow", SCALABLE(8), "_ZGVr8Nxvv")
+
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGV1Nxvv_pow", SCALABLE(1), "_ZGVr1Nxvv")
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGV2Nxvv_pow", SCALABLE(2), "_ZGVr2Nxvv")
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGV4Nxvv_pow", SCALABLE(4), "_ZGVr4Nxvv")
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "_ZGV8Nxvv_pow", SCALABLE(8), "_ZGVr8Nxvv")
+
+TLI_DEFINE_VECFUNC("exp", "_ZGV1Nxv_exp", SCALABLE(1), "_ZGVr1Nxv")
+TLI_DEFINE_VECFUNC("exp", "_ZGV2Nxv_exp", SCALABLE(2), "_ZGVr2Nxv")
+TLI_DEFINE_VECFUNC("exp", "_ZGV4Nxv_exp", SCALABLE(4), "_ZGVr4Nxv")
+TLI_DEFINE_VECFUNC("exp", "_ZGV8Nxv_exp", SCALABLE(8), "_ZGVr8Nxv")
+
+TLI_DEFINE_VECFUNC("expf", "_ZGV1Nxv_expf", SCALABLE(2), "_ZGVr1Nxv")
+TLI_DEFINE_VECFUNC("expf", "_ZGV2Nxv_expf", SCALABLE(4), "_ZGVr2Nxv")
+TLI_DEFINE_VECFUNC("expf", "_ZGV4Nxv_expf", SCALABLE(8), "_ZGVr4Nxv")
+TLI_DEFINE_VECFUNC("expf", "_ZGV8Nxv_expf", SCALABLE(16), "_ZGVr8Nxv")
+
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGV1Nxv_exp", SCALABLE(1), "_ZGVr1Nxv")
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGV2Nxv_exp", SCALABLE(2), "_ZGVr2Nxv")
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGV4Nxv_exp", SCALABLE(4), "_ZGVr4Nxv")
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "_ZGV8Nxv_exp", SCALABLE(8), "_ZGVr8Nxv")
+
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGV1Nxv_expf", SCALABLE(2), "_ZGVr1Nxv")
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGV2Nxv_expf", SCALABLE(4), "_ZGVr2Nxv")
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGV4Nxv_expf", SCALABLE(8), "_ZGVr4Nxv")
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "_ZGV8Nxv_expf", SCALABLE(16), "_ZGVr8Nxv")
+
+TLI_DEFINE_VECFUNC("log", "_ZGV1Nxv_log", SCALABLE(1), "_ZGVr1Nxv")
+TLI_DEFINE_VECFUNC("log", "_ZGV2Nxv_log", SCALABLE(2), "_ZGVr2Nxv")
+TLI_DEFINE_VECFUNC("log", "_ZGV4Nxv_log", SCALABLE(4), "_ZGVr4Nxv")
+TLI_DEFINE_VECFUNC("log", "_ZGV8Nxv_log", SCALABLE(8), "_ZGVr8Nxv")
+
+TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGV1Nxv_log", SCALABLE(1), "_ZGVr1Nxv")
+TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGV2Nxv_log", SCALABLE(2), "_ZGVr2Nxv")
+TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGV4Nxv_log", SCALABLE(4), "_ZGVr4Nxv")
+TLI_DEFINE_VECFUNC("llvm.log.f64", "_ZGV8Nxv_log", SCALABLE(8), "_ZGVr8Nxv")
+
#elif defined(TLI_DEFINE_MASSV_VECFUNCS)
// IBM MASS library's vector Functions
diff --git a/llvm/include/llvm/IR/VFABIDemangler.h b/llvm/include/llvm/IR/VFABIDemangler.h
index de731cd7051c1b..283d42375fd4aa 100644
--- a/llvm/include/llvm/IR/VFABIDemangler.h
+++ b/llvm/include/llvm/IR/VFABIDemangler.h
@@ -49,6 +49,10 @@ enum class VFISAKind {
AVX, // x86 AVX
AVX2, // x86 AVX2
AVX512, // x86 AVX512
+ RVVM1, // RISC-V Vector Extension LMUL=1
+ RVVM2, // RISC-V Vector Extension LMUL=2
+ RVVM4, // RISC-V Vector Extension LMUL=4
+ RVVM8, // RISC-V Vector Extension LMUL=8
LLVM, // LLVM internal ISA for functions that are not
// attached to an existing ABI via name mangling.
Unknown // Unknown ISA
diff --git a/llvm/lib/Analysis/TargetLibraryInfo.cpp b/llvm/lib/Analysis/TargetLibraryInfo.cpp
index 8557901192e406..0135c7a9b9ce7d 100644
--- a/llvm/lib/Analysis/TargetLibraryInfo.cpp
+++ b/llvm/lib/Analysis/TargetLibraryInfo.cpp
@@ -29,7 +29,7 @@ static cl::opt<TargetLibraryInfoImpl::VectorLibrary> ClVectorLibrary(
"Accelerate framework"),
clEnumValN(TargetLibraryInfoImpl::DarwinLibSystemM,
"Darwin_libsystem_m", "Darwin libsystem_m"),
- clEnumValN(TargetLibraryInfoImpl::LIBMVEC_X86, "LIBMVEC-X86",
+ clEnumValN(TargetLibraryInfoImpl::LIBMVEC, "LIBMVEC",
"GLIBC Vector Math library"),
clEnumValN(TargetLibraryInfoImpl::MASSV, "MASSV",
"IBM MASS vector library"),
@@ -1291,6 +1291,12 @@ static const VecDesc VecFuncs_LIBMVEC_X86[] = {
#undef TLI_DEFINE_LIBMVEC_X86_VECFUNCS
};
+static const VecDesc VecFuncs_LIBMVEC_RVV[] = {
+#define TLI_DEFINE_LIBMVEC_RVV_VECFUNCS
+#include "llvm/Analysis/VecFuncs.def"
+#undef TLI_DEFINE_LIBMVEC_RVV_VECFUNCS
+};
+
static const VecDesc VecFuncs_MASSV[] = {
#define TLI_DEFINE_MASSV_VECFUNCS
#include "llvm/Analysis/VecFuncs.def"
@@ -1360,8 +1366,19 @@ void TargetLibraryInfoImpl::addVectorizableFunctionsFromVecLib(
addVectorizableFunctions(VecFuncs_DarwinLibSystemM);
break;
}
- case LIBMVEC_X86: {
- addVectorizableFunctions(VecFuncs_LIBMVEC_X86);
+ case LIBMVEC: {
+ switch (TargetTriple.getArch()) {
+ default:
+ break;
+ case llvm::Triple::x86:
+ case llvm::Triple::x86_64:
+ addVectorizableFunctions(VecFuncs_LIBMVEC_X86);
+ break;
+ case llvm::Triple::riscv64:
+ case llvm::Triple::riscv32:
+ addVectorizableFunctions(VecFuncs_LIBMVEC_RVV);
+ break;
+ }
break;
}
case MASSV: {
diff --git a/llvm/lib/Frontend/Driver/CodeGenOptions.cpp b/llvm/lib/Frontend/Driver/CodeGenOptions.cpp
index 2d74a91f62dc07..e368a15a09bf34 100644
--- a/llvm/lib/Frontend/Driver/CodeGenOptions.cpp
+++ b/llvm/lib/Frontend/Driver/CodeGenOptions.cpp
@@ -23,7 +23,7 @@ TargetLibraryInfoImpl *createTLII(llvm::Triple &TargetTriple,
TargetTriple);
break;
case VectorLibrary::LIBMVEC:
- TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::LIBMVEC_X86,
+ TLII->addVectorizableFunctionsFromVecLib(TargetLibraryInfoImpl::LIBMVEC,
TargetTriple);
break;
case VectorLibrary::MASSV:
diff --git a/llvm/lib/IR/VFABIDemangler.cpp b/llvm/lib/IR/VFABIDemangler.cpp
index 897583084bf38c..07befb7b89b978 100644
--- a/llvm/lib/IR/VFABIDemangler.cpp
+++ b/llvm/lib/IR/VFABIDemangler.cpp
@@ -38,11 +38,19 @@ static ParseRet tryParseISA(StringRef &MangledName, VFISAKind &ISA) {
if (MangledName.consume_front(VFABI::_LLVM_)) {
ISA = VFISAKind::LLVM;
+ } else if (MangledName.consume_front("r")) {
+ ISA = StringSwitch<VFISAKind>(MangledName.take_front(1))
+ .Case("1", VFISAKind::RVVM1)
+ .Case("2", VFISAKind::RVVM2)
+ .Case("4", VFISAKind::RVVM4)
+ .Case("8", VFISAKind::RVVM8)
+ .Default(VFISAKind::RVV);
+ if (ISA != VFISAKind::RVV)
+ MangledName = MangledName.drop_front(1);
} else {
ISA = StringSwitch<VFISAKind>(MangledName.take_front(1))
.Case("n", VFISAKind::AdvancedSIMD)
.Case("s", VFISAKind::SVE)
- .Case("r", VFISAKind::RVV)
.Case("b", VFISAKind::SSE)
.Case("c", VFISAKind::AVX)
.Case("d", VFISAKind::AVX2)
@@ -79,8 +87,10 @@ static ParseRet tryParseMask(StringRef &MangledName, bool &IsMasked) {
static ParseRet tryParseVLEN(StringRef &ParseString, VFISAKind ISA,
std::pair<unsigned, bool> &ParsedVF) {
if (ParseString.consume_front("x")) {
- // SVE is the only scalable ISA currently supported.
- if (ISA != VFISAKind::SVE && ISA != VFISAKind::RVV) {
+ // SVE/RVV is the only two scalable ISAs currently supported.
+ if (ISA != VFISAKind::SVE && ISA != VFISAKind::RVV &&
+ ISA != VFISAKind::RVVM1 && ISA != VFISAKind::RVVM2 &&
+ ISA != VFISAKind::RVVM4 && ISA != VFISAKind::RVVM8) {
LLVM_DEBUG(dbgs() << "Vector function variant declared with scalable VF "
<< "but ISA supported for SVE and RVV only\n");
return ParseRet::Error;
@@ -302,17 +312,52 @@ static ParseRet tryParseAlign(StringRef &ParseString, Align &Alignment) {
// the number of elements of the given type which would fit in such a vector.
static std::optional<ElementCount> getElementCountForTy(const VFISAKind ISA,
const Type *Ty) {
- assert((ISA == VFISAKind::SVE || ISA == VFISAKind::RVV) &&
+ // Only AArch64 SVE and RVV are supported at present.
+ assert((ISA == VFISAKind::SVE || ISA == VFISAKind::RVV ||
+ ISA == VFISAKind::RVVM1 || ISA == VFISAKind::RVVM2 ||
+ ISA == VFISAKind::RVVM4 || ISA == VFISAKind::RVVM8) &&
"Scalable VF decoding only implemented for SVE and RVV\n");
- if (Ty->isIntegerTy(64) || Ty->isDoubleTy() || Ty->isPointerTy())
- return ElementCount::getScalable(2);
- if (Ty->isIntegerTy(32) || Ty->isFloatTy())
- return ElementCount::getScalable(4);
- if (Ty->isIntegerTy(16) || Ty->is16bitFPTy())
- return ElementCount::getScalable(8);
- if (Ty->isIntegerTy(8))
- return ElementCount::getScalable(16);
+ if (ISA == VFISAKind::SVE || ISA == VFISAKind::RVV) {
+ if (Ty->isIntegerTy(64) || Ty->isDoubleTy() || Ty->isPointerTy())
+ return ElementCount::getScalable(2);
+ if (Ty->isIntegerTy(32) || Ty->isFloatTy())
+ return ElementCount::getScalable(4);
+ if (Ty->isIntegerTy(16) || Ty->is16bitFPTy())
+ return ElementCount::getScalable(8);
+ if (Ty->isIntegerTy(8))
+ return ElementCount::getScalable(16);
+ } else if (ISA == VFISAKind::RVVM1 || ISA == VFISAKind::RVVM2 ||
+ ISA == VFISAKind::RVVM4 || ISA == VFISAKind::RVVM8) {
+ // Because 'vscale = VLENB/8', so the ElementCount should be
+ // 'vscale x (LMUL * 64 / sizeof(Type))'.
+ unsigned Number = 1;
+ unsigned LMUL = 1;
+ unsigned ElemCount;
+
+ // TODO: need to distingush rv32 and rv64.
+ if (Ty->isPointerTy())
+ return std::nullopt;
+
+ if (Ty->isIntegerTy(64) || Ty->isDoubleTy())
+ Number = 1;
+ if (Ty->isIntegerTy(32) || Ty->isFloatTy())
+ Number = 2;
+ if (Ty->isIntegerTy(16) || Ty->is16bitFPTy())
+ Number = 4;
+ if (Ty->isIntegerTy(8))
+ Number = 8;
+
+ if (ISA == VFISAKind::RVVM2)
+ LMUL = 2;
+ else if (ISA == VFISAKind::RVVM4)
+ LMUL = 4;
+ else if (ISA == VFISAKind::RVVM8)
+ LMUL = 8;
+
+ ElemCount = LMUL * Number;
+ return ElementCount::getScalable(ElemCount);
+ }
return std::nullopt;
}
diff --git a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
index 92b5d444aac340..8456e4294083bf 100644
--- a/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
+++ b/llvm/lib/Transforms/Utils/InjectTLIMappings.cpp
@@ -109,11 +109,11 @@ static void addMappingsFromTLI(const TargetLibraryInfo &TLI, CallInst &CI) {
TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
for (bool Predicated : {false, true}) {
- for (ElementCount VF = ElementCount::getFixed(2);
+ for (ElementCount VF = ElementCount::getFixed(1);
ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
AddVariantDecl(VF, Predicated);
- for (ElementCount VF = ElementCount::getScalable(2);
+ for (ElementCount VF = ElementCount::getScalable(1);
ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
AddVariantDecl(VF, Predicated);
}
diff --git a/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll b/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
index fde6cb788b46f9..1bd929f836f9ed 100644
--- a/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
+++ b/llvm/test/CodeGen/Generic/replace-intrinsics-with-veclib.ll
@@ -1,7 +1,7 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes
; RUN: opt -vector-library=SVML -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,SVML
; RUN: opt -vector-library=AMDLIBM -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,AMDLIBM
-; RUN: opt -vector-library=LIBMVEC-X86 -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,LIBMVEC-X86
+; RUN: opt -vector-library=LIBMVEC -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,LIBMVEC
; RUN: opt -vector-library=MASSV -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,MASSV
; RUN: opt -vector-library=Accelerate -replace-with-veclib -S < %s | FileCheck %s --check-prefixes=COMMON,ACCELERATE
@@ -19,10 +19,10 @@ define <4 x double> @exp_v4(<4 x double> %in) {
; AMDLIBM-NEXT: [[TMP1:%.*]] = call <4 x double> @amd_vrd4_exp(<4 x double> [[IN]])
; AMDLIBM-NEXT: ret <4 x double> [[TMP1]]
;
-; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_v4
-; LIBMVEC-X86-SAME: (<4 x double> [[IN:%.*]]) {
-; LIBMVEC-X86-NEXT: [[TMP1:%.*]] = call <4 x double> @_ZGVdN4v_exp(<4 x double> [[IN]])
-; LIBMVEC-X86-NEXT: ret <4 x double> [[TMP1]]
+; LIBMVEC-LABEL: define {{[^@]+}}@exp_v4
+; LIBMVEC-SAME: (<4 x double> [[IN:%.*]]) {
+; LIBMVEC-NEXT: [[TMP1:%.*]] = call <4 x double> @_ZGVdN4v_exp(<4 x double> [[IN]])
+; LIBMVEC-NEXT: ret <4 x double> [[TMP1]]
;
; MASSV-LABEL: define {{[^@]+}}@exp_v4
; MASSV-SAME: (<4 x double> [[IN:%.*]]) {
@@ -51,10 +51,10 @@ define <4 x float> @exp_f32(<4 x float> %in) {
; AMDLIBM-NEXT: [[TMP1:%.*]] = call <4 x float> @amd_vrs4_expf(<4 x float> [[IN]])
; AMDLIBM-NEXT: ret <4 x float> [[TMP1]]
;
-; LIBMVEC-X86-LABEL: define {{[^@]+}}@exp_f32
-; LIBMVEC-X86-SAME: (<4 x float> [[IN:%.*]]) {
-; LIBMVEC-X86-NEXT: [[TMP1:%.*]] = call <4 x float> @_ZGVbN4v_expf(<4 x float> [[IN]])
-; LIBMVEC-X86-NEXT: ret <4 x float> [[TMP1]]
+; LIBMVEC-LABEL: define {{[^@]+}}@exp_f32
+; LIBMVEC-SAME: (<4 x float> [[IN:%.*]]) {
+; LIBMVEC-NEXT: [[TMP1:%.*]] = call <4 x float> @_ZGVbN4v_expf(<4 x float> [[IN]])
+; LIBMVEC-NEXT: ret <4 x float> [[TMP1]]
;
; MASSV-LABEL: define {{[^@]+}}@exp_f32
; MASSV-SAME: (<4 x float> [[IN:%.*]]) {
diff --git a/llvm/test/Transforms/LoopVectorize/RISCV/libm-vector-calls.ll b/llvm/test/Transforms/LoopVectorize/RISCV/libm-vector-calls.ll
index 75fdd00e25f988..bbcd8b322cbbd7 100644
--- a/llvm/test/Transforms/LoopVectorize/RISCV/libm-vector-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/RISCV/libm-vector-calls.ll
@@ -1,5 +1,5 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
-; RUN: opt -mtriple=riscv64 -mattr=+v -vector-library=LIBMVEC-X86 -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s
+; RUN: opt -mtriple=riscv64 -mattr=+v -vector-library=LIBMVEC -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-linux-gnu"
@@ -8,37 +8,50 @@ define void @sin_f64(ptr nocapture %varray) {
; CHECK-LABEL: define void @sin_f64(
; CHECK-SAME: ptr nocapture [[VARRAY:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1000, [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1000, [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1000, [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
+; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
+; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i32> [[TMP6]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i32> [[TMP7]], splat (i32 1)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> zeroinitializer, [[TMP8]]
+; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP5]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = mul i32 1, [[TMP9]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP10]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[VEC_IND]] to <4 x double>
-; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[STEP_ADD]] to <4 x double>
-; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP1]])
-; CHECK-NEXT: [[TMP4:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP2]])
+; CHECK-NEXT: [[TMP12:%.*]] = sitofp <vscale x 2 x i32> [[VEC_IND]] to <vscale x 2 x double>
+; CHECK-NEXT: [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGV2Nxv_sin(<vscale x 2 x double> [[TMP12]])
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 4
-; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[TMP15]], align 4
-; CHECK-NEXT: store <4 x double> [[TMP4]], ptr [[TMP7]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
-; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
-; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK-NEXT: store <vscale x 2 x double> [[TMP13]], ptr [[TMP15]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[IV]] to i32
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to double
-; CHECK-NEXT: [[CALL:%.*]] = tail call double @sin(double [[CONV]]) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT: [[CALL:%.*]] = tail call double @sin(double [[CONV]]) #[[ATTR4:[0-9]+]]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[IV]]
; CHECK-NEXT: store double [[CALL]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
@@ -73,37 +86,50 @@ define void @sin_f64_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: define void @sin_f64_intrinsic(
; CHECK-SAME: ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1000, [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1000, [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1000, [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
+; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
+; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i32> [[TMP6]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i32> [[TMP7]], splat (i32 1)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> zeroinitializer, [[TMP8]]
+; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP5]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = mul i32 1, [[TMP9]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP10]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[VEC_IND]] to <4 x double>
-; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[STEP_ADD]] to <4 x double>
-; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP1]])
-; CHECK-NEXT: [[TMP4:%.*]] = call <4 x double> @_ZGVdN4v_sin(<4 x double> [[TMP2]])
+; CHECK-NEXT: [[TMP12:%.*]] = sitofp <vscale x 2 x i32> [[VEC_IND]] to <vscale x 2 x double>
+; CHECK-NEXT: [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGV2Nxv_sin(<vscale x 2 x double> [[TMP12]])
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 4
-; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[TMP15]], align 4
-; CHECK-NEXT: store <4 x double> [[TMP4]], ptr [[TMP7]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
-; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
-; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
+; CHECK-NEXT: store <vscale x 2 x double> [[TMP13]], ptr [[TMP15]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[IV]] to i32
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to double
-; CHECK-NEXT: [[CALL:%.*]] = tail call double @llvm.sin.f64(double [[CONV]]) #[[ATTR4:[0-9]+]]
+; CHECK-NEXT: [[CALL:%.*]] = tail call double @llvm.sin.f64(double [[CONV]]) #[[ATTR5:[0-9]+]]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[IV]]
; CHECK-NEXT: store double [[CALL]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
@@ -137,37 +163,50 @@ define void @cos_f64(ptr nocapture %varray) {
; CHECK-LABEL: define void @cos_f64(
; CHECK-SAME: ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1000, [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1000, [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1000, [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
+; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
+; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i32> [[TMP6]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i32> [[TMP7]], splat (i32 1)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> zeroinitializer, [[TMP8]]
+; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP5]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = mul i32 1, [[TMP9]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP10]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[VEC_IND]] to <4 x double>
-; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[STEP_ADD]] to <4 x double>
-; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP1]])
-; CHECK-NEXT: [[TMP4:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP2]])
+; CHECK-NEXT: [[TMP12:%.*]] = sitofp <vscale x 2 x i32> [[VEC_IND]] to <vscale x 2 x double>
+; CHECK-NEXT: [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGV2Nxv_cos(<vscale x 2 x double> [[TMP12]])
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 4
-; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[TMP15]], align 4
-; CHECK-NEXT: store <4 x double> [[TMP4]], ptr [[TMP7]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
-; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
-; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; CHECK-NEXT: store <vscale x 2 x double> [[TMP13]], ptr [[TMP15]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[IV]] to i32
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to double
-; CHECK-NEXT: [[CALL:%.*]] = tail call double @cos(double [[CONV]]) #[[ATTR5:[0-9]+]]
+; CHECK-NEXT: [[CALL:%.*]] = tail call double @cos(double [[CONV]]) #[[ATTR6:[0-9]+]]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[IV]]
; CHECK-NEXT: store double [[CALL]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
@@ -201,37 +240,50 @@ define void @cos_f64_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: define void @cos_f64_intrinsic(
; CHECK-SAME: ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 2
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1000, [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 2
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1000, [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1000, [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 2
+; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 2 x i32> @llvm.stepvector.nxv2i32()
+; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i32> [[TMP6]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 2 x i32> [[TMP7]], splat (i32 1)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 2 x i32> zeroinitializer, [[TMP8]]
+; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP5]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = mul i32 1, [[TMP9]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 2 x i32> poison, i32 [[TMP10]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 2 x i32> [[DOTSPLATINSERT]], <vscale x 2 x i32> poison, <vscale x 2 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 2 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = sitofp <4 x i32> [[VEC_IND]] to <4 x double>
-; CHECK-NEXT: [[TMP2:%.*]] = sitofp <4 x i32> [[STEP_ADD]] to <4 x double>
-; CHECK-NEXT: [[TMP3:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP1]])
-; CHECK-NEXT: [[TMP4:%.*]] = call <4 x double> @_ZGVdN4v_cos(<4 x double> [[TMP2]])
+; CHECK-NEXT: [[TMP12:%.*]] = sitofp <vscale x 2 x i32> [[VEC_IND]] to <vscale x 2 x double>
+; CHECK-NEXT: [[TMP13:%.*]] = call <vscale x 2 x double> @_ZGV2Nxv_cos(<vscale x 2 x double> [[TMP12]])
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i32 4
-; CHECK-NEXT: store <4 x double> [[TMP3]], ptr [[TMP15]], align 4
-; CHECK-NEXT: store <4 x double> [[TMP4]], ptr [[TMP7]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
-; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1000
-; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; CHECK-NEXT: store <vscale x 2 x double> [[TMP13]], ptr [[TMP15]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i32> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br i1 true, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 1000, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[IV]] to i32
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to double
-; CHECK-NEXT: [[CALL:%.*]] = tail call double @llvm.cos.f64(double [[CONV]]) #[[ATTR6:[0-9]+]]
+; CHECK-NEXT: [[CALL:%.*]] = tail call double @llvm.cos.f64(double [[CONV]]) #[[ATTR7:[0-9]+]]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[VARRAY]], i64 [[IV]]
; CHECK-NEXT: store double [[CALL]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
@@ -265,37 +317,50 @@ define void @exp_f32(ptr nocapture %varray) {
; CHECK-LABEL: define void @exp_f32(
; CHECK-SAME: ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1000, [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1000, [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1000, [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
+; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i32> [[TMP6]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i32> [[TMP7]], splat (i32 1)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP8]]
+; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP5]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = mul i32 1, [[TMP9]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP10]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = sitofp <8 x i32> [[VEC_IND]] to <8 x float>
-; CHECK-NEXT: [[TMP2:%.*]] = sitofp <8 x i32> [[STEP_ADD]] to <8 x float>
-; CHECK-NEXT: [[TMP3:%.*]] = call fast <8 x float> @_ZGVdN8v_expf(<8 x float> [[TMP1]])
-; CHECK-NEXT: [[TMP4:%.*]] = call fast <8 x float> @_ZGVdN8v_expf(<8 x float> [[TMP2]])
+; CHECK-NEXT: [[TMP12:%.*]] = sitofp <vscale x 4 x i32> [[VEC_IND]] to <vscale x 4 x float>
+; CHECK-NEXT: [[TMP13:%.*]] = call fast <vscale x 4 x float> @_ZGV2Nxv_expf(<vscale x 4 x float> [[TMP12]])
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[VARRAY]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 8
-; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[TMP15]], align 4
-; CHECK-NEXT: store <8 x float> [[TMP4]], ptr [[TMP7]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[STEP_ADD]], splat (i32 8)
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992
-; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP15]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br i1 false, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 992, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[INDVARS_IV]] to i32
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to float
-; CHECK-NEXT: [[CALL:%.*]] = tail call fast float @expf(float [[CONV]]) #[[ATTR7:[0-9]+]]
+; CHECK-NEXT: [[CALL:%.*]] = tail call fast float @expf(float [[CONV]]) #[[ATTR8:[0-9]+]]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VARRAY]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store float [[CALL]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
@@ -329,37 +394,50 @@ define void @exp_f32_intrin(ptr nocapture %varray) {
; CHECK-LABEL: define void @exp_f32_intrin(
; CHECK-SAME: ptr nocapture [[VARRAY:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[ENTRY:.*]]:
-; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4
+; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 1000, [[TMP1]]
+; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 1000, [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 1000, [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul i64 [[TMP4]], 4
+; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i32> @llvm.stepvector.nxv4i32()
+; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 4 x i32> [[TMP6]], zeroinitializer
+; CHECK-NEXT: [[TMP8:%.*]] = mul <vscale x 4 x i32> [[TMP7]], splat (i32 1)
+; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i32> zeroinitializer, [[TMP8]]
+; CHECK-NEXT: [[TMP9:%.*]] = trunc i64 [[TMP5]] to i32
+; CHECK-NEXT: [[TMP10:%.*]] = mul i32 1, [[TMP9]]
+; CHECK-NEXT: [[DOTSPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP10]], i64 0
+; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[DOTSPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
; CHECK: [[VECTOR_BODY]]:
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[VEC_IND:%.*]] = phi <8 x i32> [ <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
-; CHECK-NEXT: [[STEP_ADD:%.*]] = add <8 x i32> [[VEC_IND]], splat (i32 8)
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <vscale x 4 x i32> [ [[INDUCTION]], %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
; CHECK-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 0
-; CHECK-NEXT: [[TMP1:%.*]] = sitofp <8 x i32> [[VEC_IND]] to <8 x float>
-; CHECK-NEXT: [[TMP2:%.*]] = sitofp <8 x i32> [[STEP_ADD]] to <8 x float>
-; CHECK-NEXT: [[TMP3:%.*]] = call fast <8 x float> @_ZGVdN8v_expf(<8 x float> [[TMP1]])
-; CHECK-NEXT: [[TMP4:%.*]] = call fast <8 x float> @_ZGVdN8v_expf(<8 x float> [[TMP2]])
+; CHECK-NEXT: [[TMP12:%.*]] = sitofp <vscale x 4 x i32> [[VEC_IND]] to <vscale x 4 x float>
+; CHECK-NEXT: [[TMP13:%.*]] = call fast <vscale x 4 x float> @_ZGV2Nxv_expf(<vscale x 4 x float> [[TMP12]])
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds float, ptr [[VARRAY]], i64 [[TMP11]]
; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 0
-; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, ptr [[TMP14]], i32 8
-; CHECK-NEXT: store <8 x float> [[TMP3]], ptr [[TMP15]], align 4
-; CHECK-NEXT: store <8 x float> [[TMP4]], ptr [[TMP7]], align 4
-; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
-; CHECK-NEXT: [[VEC_IND_NEXT]] = add <8 x i32> [[STEP_ADD]], splat (i32 8)
-; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 992
-; CHECK-NEXT: br i1 [[TMP8]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP13]], ptr [[TMP15]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i32> [[VEC_IND]], [[DOTSPLAT]]
+; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
; CHECK: [[MIDDLE_BLOCK]]:
-; CHECK-NEXT: br i1 false, label %[[FOR_END:.*]], label %[[SCALAR_PH]]
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 1000, [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], label %[[FOR_END:.*]], label %[[SCALAR_PH]]
; CHECK: [[SCALAR_PH]]:
-; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 992, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
+; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
; CHECK-NEXT: br label %[[FOR_BODY:.*]]
; CHECK: [[FOR_BODY]]:
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY]] ]
; CHECK-NEXT: [[TMP:%.*]] = trunc i64 [[INDVARS_IV]] to i32
; CHECK-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP]] to float
-; CHECK-NEXT: [[CALL:%.*]] = tail call fast float @llvm.exp.f32(float [[CONV]]) #[[ATTR8:[0-9]+]]
+; CHECK-NEXT: [[CALL:%.*]] = tail call fast float @llvm.exp.f32(float [[CONV]]) #[[ATTR9:[0-9]+]]
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, ptr [[VARRAY]], i64 [[INDVARS_IV]]
; CHECK-NEXT: store float [[CALL]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll
index 67a2cf2b80e70c..91d5c52fa43c6b 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-VF2-VF8.ll
@@ -1,4 +1,4 @@
-; RUN: opt -vector-library=LIBMVEC-X86 -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s
+; RUN: opt -vector-library=LIBMVEC -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll
index d0d0d78a0d27e2..bdb89fbbaa8473 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls-finite.ll
@@ -1,4 +1,4 @@
-; RUN: opt -vector-library=LIBMVEC-X86 -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s
+; RUN: opt -vector-library=LIBMVEC -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll
index 7a0e44c9e99161..e0661301fd90c2 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/libm-vector-calls.ll
@@ -1,4 +1,4 @@
-; RUN: opt -vector-library=LIBMVEC-X86 -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s
+; RUN: opt -vector-library=LIBMVEC -passes=inject-tli-mappings,loop-vectorize -S < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Transforms/Util/add-TLI-mappings.ll b/llvm/test/Transforms/Util/add-TLI-mappings.ll
index 39caf008f924f1..dd0f921cf315a0 100644
--- a/llvm/test/Transforms/Util/add-TLI-mappings.ll
+++ b/llvm/test/Transforms/Util/add-TLI-mappings.ll
@@ -1,7 +1,7 @@
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -vector-library=SVML -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,SVML
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -vector-library=AMDLIBM -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,AMDLIBM
; RUN: opt -mtriple=powerpc64-unknown-linux-gnu -vector-library=MASSV -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,MASSV
-; RUN: opt -mtriple=x86_64-unknown-linux-gnu -vector-library=LIBMVEC-X86 -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,LIBMVEC-X86
+; RUN: opt -mtriple=x86_64-unknown-linux-gnu -vector-library=LIBMVEC -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,LIBMVEC
; RUN: opt -mtriple=x86_64-unknown-linux-gnu -vector-library=Accelerate -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,ACCELERATE
; RUN: opt -mtriple=aarch64-unknown-linux-gnu -vector-library=sleefgnuabi -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,SLEEFGNUABI
; RUN: opt -mtriple=riscv64-unknown-linux-gnu -vector-library=sleefgnuabi -passes=inject-tli-mappings -S < %s | FileCheck %s --check-prefixes=COMMON,SLEEFGNUABI_RISCV
@@ -32,9 +32,9 @@
; MASSV-SAME: ptr @__log10f4
; ACCELERATE-SAME: [1 x ptr] [
; ACCELERATE-SAME: ptr @vlog10f
-; LIBMVEC-X86-SAME: [2 x ptr] [
-; LIBMVEC-X86-SAME: ptr @_ZGVbN2v_sin,
-; LIBMVEC-X86-SAME: ptr @_ZGVdN4v_sin
+; LIBMVEC-SAME: [2 x ptr] [
+; LIBMVEC-SAME: ptr @_ZGVbN2v_sin,
+; LIBMVEC-SAME: ptr @_ZGVdN4v_sin
; SLEEFGNUABI-SAME: [16 x ptr] [
; SLEEFGNUABI-SAME: ptr @_ZGVnN2vl8_modf,
; SLEEFGNUABI-SAME: ptr @_ZGVsNxvl8_modf,
@@ -97,7 +97,7 @@ define double @sin_f64(double %in) {
; AMDLIBM: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
; MASSV: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
; ACCELERATE: call double @sin(double %{{.*}})
-; LIBMVEC-X86: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
+; LIBMVEC: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
; SLEEFGNUABI: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
; SLEEFGNUABI_RISCV: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
; ARMPL: call double @sin(double %{{.*}}) #[[SIN:[0-9]+]]
@@ -155,7 +155,7 @@ define float @call_llvm.log10.f32(float %in) {
; COMMON-LABEL: @call_llvm.log10.f32(
; SVML: call float @llvm.log10.f32(float %{{.*}})
; AMDLIBM: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
-; LIBMVEC-X86: call float @llvm.log10.f32(float %{{.*}})
+; LIBMVEC: call float @llvm.log10.f32(float %{{.*}})
; MASSV: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
; ACCELERATE: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
; SLEEFGNUABI: call float @llvm.log10.f32(float %{{.*}}) #[[LOG10:[0-9]+]]
@@ -164,7 +164,7 @@ define float @call_llvm.log10.f32(float %in) {
; No mapping of "llvm.log10.f32" to a vector function for SVML.
; SVML-NOT: _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}})
; AMDLIBM-NOT: _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}})
-; LIBMVEC-X86-NOT: _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}})
+; LIBMVEC-NOT: _ZGV_LLVM_{{.*}}_llvm.log10.f32({{.*}})
%call = tail call float @llvm.log10.f32(float %in)
ret float %call
}
@@ -193,8 +193,8 @@ declare float @llvm.log10.f32(float) #0
; MASSV: declare <2 x double> @__sind2(<2 x double>)
; MASSV: declare <4 x float> @__log10f4(<4 x float>)
-; LIBMVEC-X86: declare <2 x double> @_ZGVbN2v_sin(<2 x double>)
-; LIBMVEC-X86: declare <4 x double> @_ZGVdN4v_sin(<4 x double>)
+; LIBMVEC: declare <2 x double> @_ZGVbN2v_sin(<2 x double>)
+; LIBMVEC: declare <4 x double> @_ZGVdN4v_sin(<4 x double>)
; ACCELERATE: declare <4 x float> @vlog10f(<4 x float>)
@@ -266,9 +266,9 @@ attributes #0 = { nounwind readnone }
; ACCELERATE: attributes #[[LOG10]] = { "vector-function-abi-variant"=
; ACCELERATE-SAME: "_ZGV_LLVM_N4v_llvm.log10.f32(vlog10f)" }
-; LIBMVEC-X86: attributes #[[SIN]] = { "vector-function-abi-variant"=
-; LIBMVEC-X86-SAME: "_ZGV_LLVM_N2v_sin(_ZGVbN2v_sin),
-; LIBMVEC-X86-SAME: _ZGV_LLVM_N4v_sin(_ZGVdN4v_sin)" }
+; LIBMVEC: attributes #[[SIN]] = { "vector-function-abi-variant"=
+; LIBMVEC-SAME: "_ZGV_LLVM_N2v_sin(_ZGVbN2v_sin),
+; LIBMVEC-SAME: _ZGV_LLVM_N4v_sin(_ZGVdN4v_sin)" }
; SLEEFGNUABI: attributes #[[MODF]] = { "vector-function-abi-variant"=
; SLEEFGNUABI-SAME: "_ZGV_LLVM_N2vl8_modf(_ZGVnN2vl8_modf),
More information about the llvm-commits
mailing list