[llvm] Adding more vector calls for -fveclib=AMDLIBM (PR #109662)
Rohit Aggarwal via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 17 22:06:36 PDT 2024
================
@@ -1600,4 +1653,163 @@ for.end:
ret void
}
+define void @exp10_f64(ptr nocapture %varray) {
+; CHECK-LABEL: @exp10_f64(
+; CHECK-VF2: [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp10(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4-NOT: call <4 x double> @amd_vrd4_exp10(<4 x double> [[TMP4:%.*]])
+; CHECK-VF8-NOT: call <8 x double> @amd_vrd8_exp10(<8 x double> [[TMP4:%.*]])
+; CHECK: ret void
+;
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+ %tmp = trunc i64 %iv to i32
+ %conv = sitofp i32 %tmp to double
+ %call = tail call double @exp10(double %conv)
+ %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+ store double %call, ptr %arrayidx, align 4
+ %iv.next = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %iv.next, 1000
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
+
+define void @exp10_f32(ptr nocapture %varray) {
+; CHECK-LABEL: @exp10_f32(
+; CHECK-VF4: [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp10f(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8-NOT: [[TMP5:%.*]] = call <8 x float> @amd_vrs8_exp10f(<8 x float> [[TMP4:%.*]])
----------------
rohitaggarwal007 wrote:
Now exp10 is generated as llvm.exp10.v8f32 for VF=8 and respectively
https://github.com/llvm/llvm-project/pull/109662
More information about the llvm-commits
mailing list