[llvm] 4d412be - [LoopVectorize][X86] amdlibm-calls.ll - add missing sinh and f64 test coverage to all functions
Simon Pilgrim via llvm-commits
llvm-commits at lists.llvm.org
Sat Aug 31 03:48:33 PDT 2024
Author: Simon Pilgrim
Date: 2024-08-31T11:48:22+01:00
New Revision: 4d412bedcc97125896d58c27e63a977fdc856901
URL: https://github.com/llvm/llvm-project/commit/4d412bedcc97125896d58c27e63a977fdc856901
DIFF: https://github.com/llvm/llvm-project/commit/4d412bedcc97125896d58c27e63a977fdc856901.diff
LOG: [LoopVectorize][X86] amdlibm-calls.ll - add missing sinh and f64 test coverage to all functions
Shows failure to vectorise acos/asin/atan and cosh/sinh/tanh libcalls if they don't have a corresponding veclib mapping
Added:
Modified:
llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
Removed:
################################################################################
diff --git a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
index c051e2f18380bd..32a6950e205108 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/amdlibm-calls.ll
@@ -21,7 +21,9 @@ declare float @tanf(float) #0
declare double @llvm.tan.f64(double) #0
declare float @llvm.tan.f32(float) #0
+declare double @acos(double) #0
declare float @acosf(float) #0
+declare double @llvm.acos.f64(double) #0
declare float @llvm.acos.f32(float) #0
declare double @asin(double) #0
@@ -34,12 +36,19 @@ declare float @atanf(float) #0
declare double @llvm.atan.f64(double) #0
declare float @llvm.atan.f32(float) #0
+declare double @sinh(double) #0
+declare float @sinhf(float) #0
+declare double @llvm.sinh.f64(double) #0
+declare float @llvm.sinh.f32(float) #0
+
declare double @cosh(double) #0
declare float @coshf(float) #0
declare double @llvm.cosh.f64(double) #0
declare float @llvm.cosh.f32(float) #0
+declare double @tanh(double) #0
declare float @tanhf(float) #0
+declare double @llvm.tanh.f64(double) #0
declare float @llvm.tanh.f32(float) #0
declare double @pow(double, double) #0
@@ -387,8 +396,35 @@ for.end:
ret void
}
+define void @acos_f64(ptr nocapture %varray) {
+; CHECK-LABEL: @acos_f64(
+; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x double> @llvm.acos.v2f64(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.acos.v4f64(<4 x double> [[TMP4:%.*]])
+; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x double> @llvm.acos.v8f64(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.acos.v16f64(<16 x double> [[TMP4:%.*]])
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @acos(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
define void @acos_f32(ptr nocapture %varray) {
; CHECK-LABEL: @acos_f32(
+; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_acosf(<4 x float> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_acosf(<8 x float> [[TMP4:%.*]])
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_acosf(<16 x float> [[TMP4:%.*]])
@@ -412,6 +448,32 @@ for.end:
ret void
}
+define void @acos_f64_intrinsic(ptr nocapture %varray) {
+; CHECK-LABEL: @acos_f64_intrinsic(
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.acos.v2f64(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.acos.v4f64(<4 x double> [[TMP4:%.*]])
+; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.acos.v8f64(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.acos.v16f64(<16 x double> [[TMP4:%.*]])
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @llvm.acos.f64(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
define void @acos_f32_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @acos_f32_intrinsic(
; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.acos.v2f32(<2 x float> [[TMP4:%.*]])
@@ -440,7 +502,10 @@ for.end:
define void @asin_f64(ptr nocapture %varray) {
; CHECK-LABEL: @asin_f64(
+; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x double> @llvm.asin.v2f64(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.asin.v4f64(<4 x double> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_asin(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.asin.v16f64(<16 x double> [[TMP4:%.*]])
; CHECK: ret void
;
entry:
@@ -463,6 +528,7 @@ for.end:
define void @asin_f32(ptr nocapture %varray) {
; CHECK-LABEL: @asin_f32(
+; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.asin.v2f32(<2 x float> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_asinf(<4 x float> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_asinf(<8 x float> [[TMP4:%.*]])
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_asinf(<16 x float> [[TMP4:%.*]])
@@ -543,6 +609,7 @@ define void @atan_f64(ptr nocapture %varray) {
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_atan(<2 x double> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @amd_vrd4_atan(<4 x double> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @amd_vrd8_atan(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.atan.v16f64(<16 x double> [[TMP4:%.*]])
; CHECK: ret void
;
entry:
@@ -565,6 +632,7 @@ for.end:
define void @atan_f32(ptr nocapture %varray) {
; CHECK-LABEL: @atan_f32(
+; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.atan.v2f32(<2 x float> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_atanf(<4 x float> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_atanf(<8 x float> [[TMP4:%.*]])
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_atanf(<16 x float> [[TMP4:%.*]])
@@ -640,9 +708,116 @@ for.end:
ret void
}
+define void @sinh_f64(ptr nocapture %varray) {
+; CHECK-LABEL: @sinh_f64(
+; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x double> @llvm.sinh.v2f64(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.sinh.v4f64(<4 x double> [[TMP4:%.*]])
+; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x double> @llvm.sinh.v8f64(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.sinh.v16f64(<16 x double> [[TMP4:%.*]])
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @sinh(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+define void @sinh_f32(ptr nocapture %varray) {
+; CHECK-LABEL: @sinh_f32(
+; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP4:%.*]])
+; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x float> @llvm.sinh.v4f32(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x float> @llvm.sinh.v8f32(<8 x float> [[TMP4:%.*]])
+; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x float> @llvm.sinh.v16f32(<16 x float> [[TMP4:%.*]])
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @sinhf(float %conv)
+ %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+ store float %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+define void @sinh_f64_intrinsic(ptr nocapture %varray) {
+; CHECK-LABEL: @sinh_f64_intrinsic(
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.sinh.v2f64(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.sinh.v4f64(<4 x double> [[TMP4:%.*]])
+; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.sinh.v8f64(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.sinh.v16f64(<16 x double> [[TMP4:%.*]])
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @llvm.sinh.f64(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+define void @sinh_f32_intrinsic(ptr nocapture %varray) {
+; CHECK-LABEL: @sinh_f32_intrinsic(
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.sinh.v2f32(<2 x float> [[TMP4:%.*]])
+; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @llvm.sinh.v4f32(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @llvm.sinh.v8f32(<8 x float> [[TMP4:%.*]])
+; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @llvm.sinh.v16f32(<16 x float> [[TMP4:%.*]])
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to float
+ %call = tail call float @llvm.sinh.f32(float %conv)
+ %arrayidx = getelementptr inbounds float, ptr %varray, i64 %iv
+ store float %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
define void @cosh_f64(ptr nocapture %varray) {
; CHECK-LABEL: @cosh_f64(
; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_cosh(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.cosh.v4f64(<4 x double> [[TMP4:%.*]])
+; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x double> @llvm.cosh.v8f64(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.cosh.v16f64(<16 x double> [[TMP4:%.*]])
; CHECK: ret void
;
entry:
@@ -665,8 +840,10 @@ for.end:
define void @cosh_f32(ptr nocapture %varray) {
; CHECK-LABEL: @cosh_f32(
+; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.cosh.v2f32(<2 x float> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_coshf(<4 x float> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_coshf(<8 x float> [[TMP4:%.*]])
+; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x float> @llvm.cosh.v16f32(<16 x float> [[TMP4:%.*]])
; CHECK: ret void
;
entry:
@@ -739,8 +916,35 @@ for.end:
ret void
}
+define void @tanh_f64(ptr nocapture %varray) {
+; CHECK-LABEL: @tanh_f64(
+; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x double> @llvm.tanh.v2f64(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4-NOT:[[TMP5:%.*]] = call <4 x double> @llvm.tanh.v4f64(<4 x double> [[TMP4:%.*]])
+; CHECK-VF8-NOT:[[TMP5:%.*]] = call <8 x double> @llvm.tanh.v8f64(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16-NOT:[[TMP5:%.*]] = call <16 x double> @llvm.tanh.v16f64(<16 x double> [[TMP4:%.*]])
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @tanh(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
define void @tanh_f32(ptr nocapture %varray) {
; CHECK-LABEL: @tanh_f32(
+; CHECK-VF2-NOT:[[TMP5:%.*]] = call <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4:%.*]])
; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_tanhf(<4 x float> [[TMP4:%.*]])
; CHECK-VF8: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_tanhf(<8 x float> [[TMP4:%.*]])
; CHECK-VF16: [[TMP5:%.*]] = call <16 x float> @amd_vrs16_tanhf(<16 x float> [[TMP4:%.*]])
@@ -764,6 +968,32 @@ for.end:
ret void
}
+define void @tanh_f64_intrinsic(ptr nocapture %varray) {
+; CHECK-LABEL: @tanh_f64_intrinsic(
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @llvm.tanh.v2f64(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4: [[TMP5:%.*]] = call <4 x double> @llvm.tanh.v4f64(<4 x double> [[TMP4:%.*]])
+; CHECK-VF8: [[TMP5:%.*]] = call <8 x double> @llvm.tanh.v8f64(<8 x double> [[TMP4:%.*]])
+; CHECK-VF16: [[TMP5:%.*]] = call <16 x double> @llvm.tanh.v16f64(<16 x double> [[TMP4:%.*]])
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @llvm.tanh.f64(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
define void @tanh_f32_intrinsic(ptr nocapture %varray) {
; CHECK-LABEL: @tanh_f32_intrinsic(
; CHECK-VF2: [[TMP5:%.*]] = call <2 x float> @llvm.tanh.v2f32(<2 x float> [[TMP4:%.*]])
More information about the llvm-commits
mailing list