[llvm] Adding more vector calls for -fveclib=AMDLIBM (PR #109662)

Thu Oct 17 22:06:36 PDT 2024

================
@@ -1600,4 +1653,163 @@ for.end:
   ret void
 }
 
+define void @exp10_f64(ptr nocapture %varray) {
+; CHECK-LABEL: @exp10_f64(
+; CHECK-VF2:    [[TMP5:%.*]] = call <2 x double> @amd_vrd2_exp10(<2 x double> [[TMP4:%.*]])
+; CHECK-VF4-NOT:    call <4 x double> @amd_vrd4_exp10(<4 x double> [[TMP4:%.*]])
+; CHECK-VF8-NOT:    call <8 x double> @amd_vrd8_exp10(<8 x double> [[TMP4:%.*]])
+; CHECK:        ret void
+;
+entry:
+  br label %for.body
+
+for.body:
+  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
+  %tmp = trunc i64 %iv to i32
+  %conv = sitofp i32 %tmp to double
+  %call = tail call double @exp10(double %conv)
+  %arrayidx = getelementptr inbounds double, ptr %varray, i64 %iv
+  store double %call, ptr %arrayidx, align 4
+  %iv.next = add nuw nsw i64 %iv, 1
+  %exitcond = icmp eq i64 %iv.next, 1000
+  br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+  ret void
+}
+
+define void @exp10_f32(ptr nocapture %varray) {
+; CHECK-LABEL: @exp10_f32(
+; CHECK-VF4:    [[TMP5:%.*]] = call <4 x float> @amd_vrs4_exp10f(<4 x float> [[TMP4:%.*]])
+; CHECK-VF8-NOT:    [[TMP5:%.*]] = call <8 x float> @amd_vrs8_exp10f(<8 x float> [[TMP4:%.*]])
----------------
rohitaggarwal007 wrote:

Now exp10 is generated as llvm.exp10.v8f32 for VF=8 and respectively 

https://github.com/llvm/llvm-project/pull/109662